In [3]:
import polars as pl
import pandas as pd
import plotly
import time

# Step 1: Data Loading

Data Being Used: [Link](https://data.seattle.gov/Public-Safety/Call-Data/33kz-ixgy/about_data)

Details of the data are a bit less important here as I'm using it entirely for benchmarking. Details in the link for the curious.

| Metric | Value |
|---|---|
| File Size | 1.17GB |
| Rows | 5.75 Million |
| Columns | 13 |

In [28]:
path = "data/Call_Data_20240521.csv"

start_time = time.time()
pl_df = pl.read_csv(path)
polars_read = time.time() - start_time

start_time = time.time()
pd_df = pd.read_csv(path)
pandas_read = time.time() - start_time

print(f"Pandas Time to Load: {pandas_read} seconds")
print(f"Polars Time to Load: {polars_read} seconds")

Pandas Time to Load: 9.457268953323364 seconds
Polars Time to Load: 0.6550741195678711 seconds


# Step 2: Basic Aggregations

In [9]:
start_time = time.time()
display(pd_df)
pandas_display = time.time() - start_time

start_time = time.time()
display(pl_df)
polars_display = time.time() - start_time

print(f"Pandas Time to Display: {pandas_display} seconds")
print(f"Polars Time to Display: {polars_display} seconds")

Unnamed: 0,CAD Event Number,Event Clearance Description,Call Type,Priority,Initial Call Type,Final Call Type,Original Time Queued,Arrived Time,Precinct,Sector,Beat,Blurred_Longitude,Blurred_Latitude
0,2013000425769,ASSISTANCE RENDERED,911,4.0,INFORMATIONAL BROADCASTS,--HAZARDS - HAZARDS,11/25/2013 08:23:28 AM,11/25/2013 09:15:55 AM,EAST,EDWARD,E3,-122.327097,47.611479
1,2011000280134,PHYSICAL ARREST MADE,911,1.0,ASLT - PERSON SHOT OR SHOT AT,"--WEAPON, PERSON WITH - GUN",08/25/2011 11:08:55 PM,08/25/2011 11:13:58 PM,WEST,MARY,M2,-122.335373,47.612136
2,2010000139203,PHYSICAL ARREST MADE,ONVIEW,7.0,SUSPICIOUS STOP - OFFICER INITIATED ONVIEW,--ASSIST OTHER AGENCY - STATE AGENCY,04/29/2010 11:08:48 AM,04/29/2010 11:08:48 AM,NORTH,BOY,B3,-122.347002,47.660979
3,2014000034191,ASSISTANCE RENDERED,"TELEPHONE OTHER, NOT 911",3.0,TRAFFIC - BO SIGNALS AND DOWN SIGNS,--HAZARDS - HAZARDS,01/31/2014 04:39:56 PM,01/31/2014 04:47:26 PM,WEST,DAVID,D2,0.000000,0.000000
4,2016000459409,UNABLE TO LOCATE INCIDENT OR COMPLAINANT,911,2.0,THEFT (DOES NOT INCLUDE SHOPLIFT OR SVCS),--SUSPICIOUS CIRCUM. - SUSPICIOUS PERSON,12/23/2016 04:52:14 AM,12/23/2016 05:03:38 AM,WEST,DAVID,D1,-122.343482,47.612170
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5753800,2019000287778,REPORT WRITTEN (NO ARREST),ONVIEW,3.0,AUTO RECOVERY,--AUTOMOBILES - RECOVERY (THEFT),08/04/2019 02:19:49 PM,08/04/2019 02:19:49 PM,SOUTHWEST,FRANK,F3,-122.345464,47.523139
5753801,2019000392161,DUPLICATED OR CANCELLED BY RADIO,"TELEPHONE OTHER, NOT 911",3.0,AUTO RECOVERY,AUTO RECOVERY,10/21/2019 09:21:07 AM,10/21/2019 11:54:52 AM,SOUTHWEST,FRANK,F3,-122.321614,47.525611
5753802,2019000239109,OTHER REPORT MADE,ONVIEW,7.0,TRAFFIC STOP - OFFICER INITIATED ONVIEW,--TRAFFIC - MOVING VIOLATION,06/30/2019 06:46:26 PM,06/30/2019 06:46:26 PM,SOUTHWEST,WILLIAM,W2,-122.386804,47.560216
5753803,2019000060428,REPORT WRITTEN (NO ARREST),911,3.0,AUTO RECOVERY,--AUTOMOBILES - RECOVERY (THEFT),02/17/2019 04:51:17 PM,02/17/2019 05:41:45 PM,SOUTHWEST,FRANK,F3,-122.321600,47.526461


CAD Event Number,Event Clearance Description,Call Type,Priority,Initial Call Type,Final Call Type,Original Time Queued,Arrived Time,Precinct,Sector,Beat,Blurred_Longitude,Blurred_Latitude
i64,str,str,i64,str,str,str,str,str,str,str,f64,f64
2013000425769,"""ASSISTANCE RENDERED""","""911""",4,"""INFORMATIONAL BROADCASTS""","""--HAZARDS - HAZARDS""","""11/25/2013 08:23:28 AM""","""11/25/2013 09:15:55 AM""","""EAST""","""EDWARD""","""E3""",-122.327097,47.611479
2011000280134,"""PHYSICAL ARREST MADE""","""911""",1,"""ASLT - PERSON SHOT OR SHOT AT""","""--WEAPON, PERSON WITH - GUN""","""08/25/2011 11:08:55 PM""","""08/25/2011 11:13:58 PM""","""WEST""","""MARY""","""M2""",-122.335373,47.612136
2010000139203,"""PHYSICAL ARREST MADE""","""ONVIEW""",7,"""SUSPICIOUS STOP - OFFICER INIT…","""--ASSIST OTHER AGENCY - STATE …","""04/29/2010 11:08:48 AM""","""04/29/2010 11:08:48 AM""","""NORTH""","""BOY""","""B3""",-122.347002,47.660979
2014000034191,"""ASSISTANCE RENDERED""","""TELEPHONE OTHER, NOT 911""",3,"""TRAFFIC - BO SIGNALS AND DOWN …","""--HAZARDS - HAZARDS""","""01/31/2014 04:39:56 PM""","""01/31/2014 04:47:26 PM""","""WEST""","""DAVID""","""D2""",0.0,0.0
2016000459409,"""UNABLE TO LOCATE INCIDENT OR C…","""911""",2,"""THEFT (DOES NOT INCLUDE SHOPLI…","""--SUSPICIOUS CIRCUM. - SUSPICI…","""12/23/2016 04:52:14 AM""","""12/23/2016 05:03:38 AM""","""WEST""","""DAVID""","""D1""",-122.343482,47.61217
…,…,…,…,…,…,…,…,…,…,…,…,…
2019000287778,"""REPORT WRITTEN (NO ARREST)""","""ONVIEW""",3,"""AUTO RECOVERY""","""--AUTOMOBILES - RECOVERY (THEF…","""08/04/2019 02:19:49 PM""","""08/04/2019 02:19:49 PM""","""SOUTHWEST""","""FRANK""","""F3""",-122.345464,47.523139
2019000392161,"""DUPLICATED OR CANCELLED BY RAD…","""TELEPHONE OTHER, NOT 911""",3,"""AUTO RECOVERY""","""AUTO RECOVERY""","""10/21/2019 09:21:07 AM""","""10/21/2019 11:54:52 AM""","""SOUTHWEST""","""FRANK""","""F3""",-122.321614,47.525611
2019000239109,"""OTHER REPORT MADE""","""ONVIEW""",7,"""TRAFFIC STOP - OFFICER INITIAT…","""--TRAFFIC - MOVING VIOLATION""","""06/30/2019 06:46:26 PM""","""06/30/2019 06:46:26 PM""","""SOUTHWEST""","""WILLIAM""","""W2""",-122.386804,47.560216
2019000060428,"""REPORT WRITTEN (NO ARREST)""","""911""",3,"""AUTO RECOVERY""","""--AUTOMOBILES - RECOVERY (THEF…","""02/17/2019 04:51:17 PM""","""02/17/2019 05:41:45 PM""","""SOUTHWEST""","""FRANK""","""F3""",-122.3216,47.526461


Pandas Time to Display: 0.012462854385375977 seconds
Polars Time to Display: 0.005150794982910156 seconds


In [27]:
start_time = time.time()
pd_df.groupby("Call Type")["Priority"].mean()
pandas_agg = time.time() - start_time

start_time = time.time()
pl_df.group_by("Call Type").agg(pl.mean("Priority")) 
polars_agg = time.time() - start_time

print(f"Pandas Time to Aggregate: {pandas_agg} seconds")
print(f"Polars Time to Display: {polars_agg} seconds")

Pandas Time to Aggregate: 0.25269126892089844 seconds
Polars Time to Display: 0.20731639862060547 seconds


In [32]:
pd_df.loc[pd_df["Priority"] == 4]

Unnamed: 0,CAD Event Number,Event Clearance Description,Call Type,Priority,Initial Call Type,Final Call Type,Original Time Queued,Arrived Time,Precinct,Sector,Beat,Blurred_Longitude,Blurred_Latitude
0,2013000425769,ASSISTANCE RENDERED,911,4.0,INFORMATIONAL BROADCASTS,--HAZARDS - HAZARDS,11/25/2013 08:23:28 AM,11/25/2013 09:15:55 AM,EAST,EDWARD,E3,-122.327097,47.611479
41,2009000325098,ASSISTANCE RENDERED,"TELEPHONE OTHER, NOT 911",4.0,NOISE - GENERAL,--DISTURBANCE - NOISE,09/14/2009 02:45:22 AM,09/14/2009 04:34:32 AM,EAST,EDWARD,E1,-1.000000,-1.000000
50,2011000247334,ASSISTANCE RENDERED,ONVIEW,4.0,REQUEST TO WATCH,--PREMISE CHECKS - REQUEST TO WATCH,07/30/2011 04:44:40 AM,07/30/2011 04:44:40 AM,WEST,QUEEN,Q3,-122.358020,47.623934
63,2015000121376,ASSISTANCE RENDERED,911,4.0,NOISE - GENERAL,--DISTURBANCE - NOISE,04/14/2015 08:30:59 AM,04/14/2015 08:43:28 AM,WEST,DAVID,D1,-122.345003,47.613656
86,2014000149467,CITATION ISSUED (CRIMINAL OR NON-CRIMINAL),"TELEPHONE OTHER, NOT 911",4.0,PARKING VIOLATION (EXCEPT ABANDONED CAR),--TRAFFIC - PARKING VIOL (EXCEPT ABANDONED CAR),05/14/2014 06:39:17 AM,,NORTH,UNION,U2,-122.319786,47.658489
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5753555,2012000121288,ASSISTANCE RENDERED,ONVIEW,4.0,REQUEST TO WATCH,--PREMISE CHECKS - REQUEST TO WATCH,04/21/2012 04:02:21 AM,04/21/2012 04:02:21 AM,NORTH,JOHN,J2,-122.372557,47.682350
5753730,2013000221658,REPORT WRITTEN (NO ARREST),911,4.0,PROPERTY - FOUND,--PROPERTY - FOUND PROPERTY,06/25/2013 03:46:53 PM,06/25/2013 04:16:19 PM,NORTH,BOY,B3,-122.324023,47.662248
5753736,2009000196023,ASSISTANCE RENDERED,911,4.0,NOISE - GENERAL,--DISTURBANCE - NOISE,06/07/2009 01:25:43 AM,06/07/2009 03:19:46 AM,WEST,DAVID,D3,-1.000000,-1.000000
5753788,2016000357341,ASSISTANCE RENDERED,911,4.0,FIREWORKS (NO HAZARD),--MISCHIEF OR NUISANCE - GENERAL,10/02/2016 02:14:19 PM,10/02/2016 03:30:00 PM,SOUTHWEST,WILLIAM,W3,-122.383152,47.545568
