In [98]:
import pandas as pd
import terality as te
from codetiming import Timer
import random

In [99]:
file_path = "C:\\\\Users\\\\bhask\\\\Google Drive\\\\datasets\\\\Chicago_taxi_trips\\\\"
timer = Timer(name="Terality Tests")

In [100]:
# Disabling Terality cache for testing
cache_disabler = te.disable_cache()
cache_disabler.__enter__()

In [101]:
def get_slice_range(limit):
    start = random.randint(1, limit)
    end = random.randint(start, start + 1000)
    return start, end

In [102]:
# A list of dataframe columns renamed by converting to lowercase and replacing spaces with '_'
columns = [
'trip_id',
'taxi_id',
'trip_start_timestamp',
'trip_end_timestamp',
'trip_seconds',
'trip_miles',
'pickup_census_tract',
'dropoff_census_tract',
'pickup_community_area',
'dropoff_community_area',
'fare',
'tips',
'tolls',
'extras',
'trip_total',
'payment_type',
'company',
'pickup_centroid_latitude',
'pickup_centroid_longitude',
'pickup_centroid_location',
'dropoff_centroid_latitude',
'dropoff_centroid_longitude',
'dropoff_centroid__location'
]

In [103]:
# Loading data into dataframes using Pandas
timer.start()
taxi_trips_pdf = pd.read_csv(file_path + "Taxi_Trips_2019_2020.csv")
timer.stop()

Elapsed time: 162.9772 seconds


162.97720410001057

In [104]:
# Loading data into dataframes using Terality
timer.start()
taxi_trips_tdf = te.read_csv(file_path + "Taxi_Trips_2019_2020.csv")
timer.stop()

C:\\Users\\bhask\\Google Drive\\datasets\\Chicago_taxi_trips\\Taxi_Trips_2019_2020.csv: 100%|█| 8.48G/8.48G [25:37<00:0


Elapsed time: 1587.6724 seconds


1587.6723926999985

In [105]:
timer.start()
taxi_trips_pdf.head()
timer.stop()

Elapsed time: 0.0161 seconds


0.01609559998905752

In [106]:
timer.start()
taxi_trips_tdf.head()
timer.stop()

Elapsed time: 4.4659 seconds


4.465903899996192

In [107]:
# Rename the columns of the Pandas dataframe
timer.start()
taxi_trips_pdf.columns = columns
timer.stop()

Elapsed time: 0.0260 seconds


0.02601940000022296

In [108]:
# Rename the columns of the Terality dataframe
timer.start()
taxi_trips_tdf.columns = columns
timer.stop()

Elapsed time: 2.8640 seconds


2.863973100000294

In [109]:
# Row count before cleanup - using Pandas
timer.start()
print("Pandas DF Row Count before: % d" % taxi_trips_pdf.shape[0])
timer.stop()

Pandas DF Row Count before:  20366397
Elapsed time: 0.0026 seconds


0.002559200001996942

In [110]:
# Remove all the rows with NaN values using Pandas. There is plenty of data even otherwise
timer.start()
taxi_trips_pdf.dropna(how='any',axis=0,inplace=True)
timer.stop()

Elapsed time: 13.7942 seconds


13.794167400003062

In [111]:
# Row count after cleanup - using Pandas
timer.start()
row_count = taxi_trips_pdf.shape[0]
print("Pandas DF Row Count after: % d" % row_count)
timer.stop()

Pandas DF Row Count after:  12199631
Elapsed time: 0.0014 seconds


0.0014073000056669116

In [112]:
# Row count before cleanup - using Terality
timer.start()
print("Terality DF Row Count after: % d" % taxi_trips_tdf.shape[0])
timer.stop()

Terality DF Row Count after:  20366397
Elapsed time: 1.3118 seconds


1.3117552999901818

In [113]:
# Remove all the rows with NaN values using Terality. There is plenty of data even otherwise
timer.start()
taxi_trips_tdf.dropna(how='any',axis=0,inplace=True)
timer.stop()

Elapsed time: 9.8463 seconds


9.846295900002588

In [114]:
# Row count after cleanup - using Terality
timer.start()
print("Terality DF Row Count after: % d" % taxi_trips_tdf.shape[0])
timer.stop()

Terality DF Row Count after:  12199631
Elapsed time: 0.7833 seconds


0.7832590000034543

In [115]:
# Adding a new column to store datetime version of string datetimes - Pandas
timer.start()
taxi_trips_pdf['trip_start_dt'] = pd.to_datetime(taxi_trips_pdf['trip_start_timestamp'], infer_datetime_format=True)
taxi_trips_pdf['trip_end_dt'] = pd.to_datetime(taxi_trips_pdf['trip_end_timestamp'], infer_datetime_format=True)
taxi_trips_pdf['trip_start_ym'] = taxi_trips_pdf['trip_start_dt'].dt.to_period('M')
taxi_trips_pdf['trip_end_ym'] = taxi_trips_pdf['trip_end_dt'].dt.to_period('M')
timer.stop()

Elapsed time: 12.7713 seconds


12.771258900000248

In [116]:
taxi_trips_pdf.head()

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
16,e066e483f0fc2a583895d24c9dec9864a6608052,4794c36af4c1852d90b5c8cf5f9f6cd38a8b9c1b68cf48...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,600.0,0.0,17031080000.0,17031840000.0,8.0,32.0,...,41.891972,-87.612945,POINT (-87.6129454143 41.8919715078),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
19,5431595d222a3e00a9e4b3e93c928c306dc71be3,86b0677bb9bcda0454863c70a616d1adc45129237848d1...,01/01/2019 12:00:00 AM,01/01/2019 12:00:00 AM,120.0,0.3,17031840000.0,17031320000.0,32.0,32.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),2019-01-01,2019-01-01 00:00:00,2019-01,2019-01
20,509e870adce2a514a4287e1a305fe4ddabc59836,38f6145c9a2b848dc1baa16fd91087e606b12bcb8757a9...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,0.8,17031080000.0,17031080000.0,8.0,8.0,...,41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
22,31004535c12d4372c165e34386cde0521a0a72e8,ee8a2f86372e99ecf87d44e13a20ebda7db8ae69d5b444...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,1.0,17031080000.0,17031080000.0,8.0,8.0,...,41.890922,-87.618868,POINT (-87.6188683546 41.8909220259),41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
23,da18ec450c1fed02fbbc675faa621f8ac50ab5dd,0013da5489fe976daf4f4a7d246073ecf2caed9b12b3be...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,420.0,0.8,17031080000.0,17031080000.0,8.0,8.0,...,41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01


In [117]:
# Adding a new column to store datetime version of string datetimes - Terality
timer.start()
taxi_trips_tdf['trip_start_dt'] = te.to_datetime(taxi_trips_tdf['trip_start_timestamp'], infer_datetime_format=True)
taxi_trips_tdf['trip_end_dt'] = te.to_datetime(taxi_trips_tdf['trip_end_timestamp'], infer_datetime_format=True)
taxi_trips_tdf['trip_start_ym'] = taxi_trips_tdf['trip_start_dt'].dt.to_period('M')
taxi_trips_tdf['trip_end_ym'] = taxi_trips_tdf['trip_end_dt'].dt.to_period('M')
timer.stop()

Elapsed time: 20.9454 seconds


20.945380599994678

In [118]:
taxi_trips_tdf.head()
# timer.stop()


Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
16,e066e483f0fc2a583895d24c9dec9864a6608052,4794c36af4c1852d90b5c8cf5f9f6cd38a8b9c1b68cf48...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,600.0,0.0,17031080000.0,17031840000.0,8.0,32.0,...,41.891972,-87.612945,POINT (-87.6129454143 41.8919715078),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
19,5431595d222a3e00a9e4b3e93c928c306dc71be3,86b0677bb9bcda0454863c70a616d1adc45129237848d1...,01/01/2019 12:00:00 AM,01/01/2019 12:00:00 AM,120.0,0.3,17031840000.0,17031320000.0,32.0,32.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),2019-01-01,2019-01-01 00:00:00,2019-01,2019-01
20,509e870adce2a514a4287e1a305fe4ddabc59836,38f6145c9a2b848dc1baa16fd91087e606b12bcb8757a9...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,0.8,17031080000.0,17031080000.0,8.0,8.0,...,41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
22,31004535c12d4372c165e34386cde0521a0a72e8,ee8a2f86372e99ecf87d44e13a20ebda7db8ae69d5b444...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,1.0,17031080000.0,17031080000.0,8.0,8.0,...,41.890922,-87.618868,POINT (-87.6188683546 41.8909220259),41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01
23,da18ec450c1fed02fbbc675faa621f8ac50ab5dd,0013da5489fe976daf4f4a7d246073ecf2caed9b12b3be...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,420.0,0.8,17031080000.0,17031080000.0,8.0,8.0,...,41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),2019-01-01,2019-01-01 00:15:00,2019-01,2019-01


In [119]:
# Describe - Pandas
timer.start()
taxi_trips_pdf.describe()

Unnamed: 0,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,pickup_centroid_latitude,pickup_centroid_longitude,dropoff_centroid_latitude,dropoff_centroid_longitude
count,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0
mean,780.6322,2.882408,17031410000.0,17031380000.0,26.28359,23.19189,12.79652,1.78176,0.0007331033,0.8851525,15.60699,41.89265,-87.65359,41.89144,-87.64459
std,1207.102,5.479885,343631.7,334161.3,18.8116,16.34532,61.89516,2.826719,0.3181317,16.09252,64.59551,0.03113462,0.07520429,0.02604625,0.05721153
min,0.0,0.0,17031010000.0,17031010000.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,41.65022,-87.90304,41.66376,-87.90304
25%,340.0,0.54,17031080000.0,17031080000.0,8.0,8.0,5.75,0.0,0.0,0.0,7.0,41.88099,-87.63784,41.88099,-87.6409
50%,514.0,1.08,17031320000.0,17031280000.0,28.0,28.0,7.25,0.9,0.0,0.0,9.0,41.8853,-87.63186,41.8853,-87.63186
75%,823.0,1.9,17031840000.0,17031840000.0,32.0,32.0,10.25,2.0,0.0,1.0,12.5,41.89322,-87.62197,41.89503,-87.62197
max,86400.0,995.9,17031980000.0,17031980000.0,77.0,77.0,9800.26,400.0,960.68,9327.76,9800.3,42.02122,-87.53071,42.02122,-87.53071


In [120]:
timer.stop()

Elapsed time: 5.1323 seconds


5.132343800010858

In [121]:
# Describe - Terality
timer.start()
taxi_trips_tdf.describe()

Unnamed: 0,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,pickup_centroid_latitude,pickup_centroid_longitude,dropoff_centroid_latitude,dropoff_centroid_longitude
count,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0,12199630.0
mean,780.6322,2.882408,17031410000.0,17031380000.0,26.28359,23.19189,12.79652,1.78176,0.0007331033,0.8851525,15.60699,41.89265,-87.65359,41.89144,-87.64459
std,1207.102,5.479885,343631.7,334161.3,18.8116,16.34532,61.89516,2.826719,0.3181317,16.09252,64.59551,0.03113462,0.07520429,0.02604625,0.05721153
min,0.0,0.0,17031010000.0,17031010000.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,41.65022,-87.90304,41.66376,-87.90304
25%,340.0,0.54,17031080000.0,17031080000.0,8.0,8.0,5.75,0.0,0.0,0.0,7.0,41.88099,-87.63784,41.88099,-87.6409
50%,514.0,1.08,17031320000.0,17031280000.0,28.0,28.0,7.25,0.9,0.0,0.0,9.0,41.8853,-87.63186,41.8853,-87.63186
75%,823.0,1.9,17031840000.0,17031840000.0,32.0,32.0,10.25,2.0,0.0,1.0,12.5,41.89322,-87.62197,41.89503,-87.62197
max,86400.0,995.9,17031980000.0,17031980000.0,77.0,77.0,9800.26,400.0,960.68,9327.76,9800.3,42.02122,-87.53071,42.02122,-87.53071


In [122]:
timer.stop()

Elapsed time: 32.7246 seconds


32.72464509999554

In [123]:
# Query 1: Number of trips grouped by taxi - Using Pandas
timer.start()
taxi_trips_pdf.groupby('taxi_id').count()['trip_id']


taxi_id
0008de7a146802839c9e6059f482d292ebdae13c5c31dd6e5983a80882e2a5dbcd6ea098c2fcd56f34ce02645eb94c6b39512e9304837746d4e289b6236c2c5b    3372
001330b81e23412049f9c3eff5b6e972a91afe59c9aa36ef29747881b4bf803adfd02b1a71cca5cb8c2214275330aa4520df34d7848100414acb4b4cad642e62    3072
0013da5489fe976daf4f4a7d246073ecf2caed9b12b3be70eff37b63ef9ca4102c972145ddd8537811752d51b222b4618dfbae451b966d7a3ad9afa4ec878a6e    4093
0041f8f0c91881c1e1913f2548522495fe3c4c719aa67fd2483ebbcc22e0aa1bfff2cddb32c35cde3ad9766b0e8d44c04ff5dfdcf1035462ba81a13e3ba6b8f9    3431
0044e6c0d091476299b99345501f756b23632a96cbaf40e872fbf14f976410d3f938aacc643ed608b2aa42809222d4458e1aab5e0848e9b952f35616785c3a36    2104
                                                                                                                                    ... 
ffd70d111c7da2e5eff27ad900c8eb94d74fe286bb7aa58029ee2baac3f3b310dea5da2a6a77a366b7b6d4b104b91ef75e7959ac6b1f558be7844417cbe3e12f     603
ffda53354c610fd3af1aee46d723028a4

In [124]:
timer.stop()

Elapsed time: 8.2759 seconds


8.27592949999962

In [125]:
# Query 1: Number of trips grouped by taxi - Using Terality
timer.start()
taxi_trips_pdf.groupby('taxi_id').count()['trip_id']



taxi_id
0008de7a146802839c9e6059f482d292ebdae13c5c31dd6e5983a80882e2a5dbcd6ea098c2fcd56f34ce02645eb94c6b39512e9304837746d4e289b6236c2c5b    3372
001330b81e23412049f9c3eff5b6e972a91afe59c9aa36ef29747881b4bf803adfd02b1a71cca5cb8c2214275330aa4520df34d7848100414acb4b4cad642e62    3072
0013da5489fe976daf4f4a7d246073ecf2caed9b12b3be70eff37b63ef9ca4102c972145ddd8537811752d51b222b4618dfbae451b966d7a3ad9afa4ec878a6e    4093
0041f8f0c91881c1e1913f2548522495fe3c4c719aa67fd2483ebbcc22e0aa1bfff2cddb32c35cde3ad9766b0e8d44c04ff5dfdcf1035462ba81a13e3ba6b8f9    3431
0044e6c0d091476299b99345501f756b23632a96cbaf40e872fbf14f976410d3f938aacc643ed608b2aa42809222d4458e1aab5e0848e9b952f35616785c3a36    2104
                                                                                                                                    ... 
ffd70d111c7da2e5eff27ad900c8eb94d74fe286bb7aa58029ee2baac3f3b310dea5da2a6a77a366b7b6d4b104b91ef75e7959ac6b1f558be7844417cbe3e12f     603
ffda53354c610fd3af1aee46d723028a4

In [126]:
timer.stop()

Elapsed time: 7.9469 seconds


7.946876800007885

In [127]:
# Query 2: Number of trips grouped by taxi and year/month - Using Pandas
timer.start()
taxi_trips_pdf.groupby(['taxi_id', 'trip_start_ym']).count()['trip_id']

taxi_id                                                                                                                           trip_start_ym
0008de7a146802839c9e6059f482d292ebdae13c5c31dd6e5983a80882e2a5dbcd6ea098c2fcd56f34ce02645eb94c6b39512e9304837746d4e289b6236c2c5b  2019-01          230
                                                                                                                                  2019-02          248
                                                                                                                                  2019-03          212
                                                                                                                                  2019-04          202
                                                                                                                                  2019-05          270
                                                                                                     

In [128]:
timer.stop()

Elapsed time: 8.9333 seconds


8.93328769999789

In [129]:
# Query 2: Number of trips grouped by taxi and year/month - Using Terality
timer.start()
taxi_trips_tdf.groupby(['taxi_id', 'trip_start_ym']).count()['trip_id']

taxi_id                                                                                                                           trip_start_ym
0008de7a146802839c9e6059f482d292ebdae13c5c31dd6e5983a80882e2a5dbcd6ea098c2fcd56f34ce02645eb94c6b39512e9304837746d4e289b6236c2c5b  2019-01          230
                                                                                                                                  2019-02          248
                                                                                                                                  2019-03          212
                                                                                                                                  2019-04          202
                                                                                                                                  2019-05          270
                                                                                                     

In [130]:
timer.stop()

Elapsed time: 21.3977 seconds


21.397688299999572

In [131]:
# Query 3: Number of trips by Taxi Company - using Pandas
timer.start()
taxi_trips_pdf.groupby('company').count()['trip_id'].sort_values(ascending=False).head(10)


company
Taxi Affiliation Services            2769009
Chicago Carriage Cab Corp            1506931
Flash Cab                            1220891
Sun Taxi                             1046724
Medallion Leasin                      932280
City Service                          906870
Star North Management LLC             742670
Blue Ribbon Taxi Association Inc.     581908
Choice Taxi Association               400096
Taxicab Insurance Agency, LLC         340868
Name: trip_id, dtype: int64

In [132]:
timer.stop()

Elapsed time: 5.5310 seconds


5.5309644999942975

In [133]:
# Query 3: Number of trips by Taxi Company - using Terality
timer.start()
taxi_trips_tdf.groupby('company').count()['trip_id'].sort_values(ascending=False).head(10)


company
Taxi Affiliation Services            2769009
Chicago Carriage Cab Corp            1506931
Flash Cab                            1220891
Sun Taxi                             1046724
Medallion Leasin                      932280
City Service                          906870
Star North Management LLC             742670
Blue Ribbon Taxi Association Inc.     581908
Choice Taxi Association               400096
Taxicab Insurance Agency, LLC         340868
Name: trip_id, dtype: int64

In [134]:
timer.stop()

Elapsed time: 23.4792 seconds


23.479232700003195

In [135]:
# Query 4: Sort by Taxi Company - using Pandas
timer.start()
taxi_trips_pdf.sort_values(by="company")
timer.stop()

Elapsed time: 13.7101 seconds


13.710100900003454

In [136]:
# Query 4: Sort by Taxi Company - using Terality
timer.start()
taxi_trips_tdf.sort_values(by="company")
timer.stop()

Elapsed time: 24.0933 seconds


24.093318900006125

In [137]:
range = get_slice_range(row_count)
print("Start: %d, End: %d" % range)


Start: 3349112, End: 3349614


In [138]:
# Query 5: Slicing using random numbers for start and end - using Pandas
timer.start()
taxi_trips_pdf[range[0]:range[1]]

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
5518823,6546c749f2ee8d1fbf961a638a38a191988f0483,7d72fbc3bf81a1d1cc1e5b812096828df769a74ed08e13...,05/01/2019 12:00:00 PM,05/01/2019 12:00:00 PM,180.0,0.00,1.703184e+10,1.703184e+10,32.0,32.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:00:00,2019-05-01 12:00:00,2019-05,2019-05
5518825,5a41ecd1d467e6f8e3cafe9544c42461f97f02f3,059bda2b6b156207134204beed08bb2b9d3c3643c60e1c...,05/01/2019 12:00:00 PM,05/01/2019 12:15:00 PM,660.0,1.40,1.703108e+10,1.703132e+10,8.0,32.0,...,41.902788,-87.626146,POINT (-87.6261455896 41.9027880476),41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),2019-05-01 12:00:00,2019-05-01 12:15:00,2019-05,2019-05
5518826,5a624313d6a193a3522a4a50d2bbe68b6754d04c,5e3077a68e0439a1168a98d848e9ae3fd8c74f61bc4323...,05/01/2019 12:00:00 PM,05/01/2019 12:15:00 PM,660.0,1.10,1.703132e+10,1.703184e+10,32.0,32.0,...,41.884987,-87.620993,POINT (-87.6209929134 41.8849871918),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:00:00,2019-05-01 12:15:00,2019-05,2019-05
5518827,5afa5a13c6c3ba2ca2c44cde9886ff3e4bd62281,54b032d1527c44cbca520e9af4e89090945868e46dd774...,05/01/2019 12:00:00 PM,05/01/2019 12:30:00 PM,1860.0,17.20,1.703198e+10,1.703108e+10,76.0,8.0,...,41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),2019-05-01 12:00:00,2019-05-01 12:30:00,2019-05,2019-05
5518830,662421ff994c16eaa80426ed523d2ef5770e1df9,a940425dd1a398a41024ee7bb70bbbf66515799b941674...,05/01/2019 12:00:00 PM,05/01/2019 12:00:00 PM,300.0,1.10,1.703108e+10,1.703108e+10,8.0,8.0,...,41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),2019-05-01 12:00:00,2019-05-01 12:00:00,2019-05,2019-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5519517,37870628d394ae4668ce89d4fdaf9340a7dca9dd,21c91ae089317069d5b6e18187c887882b0dcce86d9302...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,420.0,0.90,1.703108e+10,1.703184e+10,8.0,32.0,...,41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05
5519520,389cbd4668b040ae05a61b522062c03bac074c98,2d49dd422b09d66e3e2da3120413ee76b8e28e19fd2e21...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,360.0,1.00,1.703184e+10,1.703183e+10,32.0,28.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.879067,-87.657005,POINT (-87.657005027 41.8790669938),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05
5519521,38c4a98e6705a3c30c18e6cf3b05fb9f53a540ed,847cf962bd6f62040673e6c24c24940aeb2d7fdaa54677...,05/01/2019 12:15:00 PM,05/01/2019 12:30:00 PM,1080.0,1.40,1.703132e+10,1.703184e+10,32.0,32.0,...,41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:15:00,2019-05-01 12:30:00,2019-05,2019-05
5519523,38fe56fec319fcd3898a01ab7651feb9ddf02d14,1b2a60b8aca7f106dc2ddcd51b43e9448be7920ca7a24d...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,360.0,0.70,1.703108e+10,1.703108e+10,8.0,8.0,...,41.891972,-87.612945,POINT (-87.6129454143 41.8919715078),41.895033,-87.619711,POINT (-87.6197106717 41.8950334495),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05


In [139]:
timer.stop()

Elapsed time: 0.0486 seconds


0.04860460000054445

In [140]:
# Query 5: Slicing using random numbers for start and end - using Terality
timer.start()
taxi_trips_tdf[range[0]:range[1]]

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
5518823,6546c749f2ee8d1fbf961a638a38a191988f0483,7d72fbc3bf81a1d1cc1e5b812096828df769a74ed08e13...,05/01/2019 12:00:00 PM,05/01/2019 12:00:00 PM,180.0,0.00,1.703184e+10,1.703184e+10,32.0,32.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:00:00,2019-05-01 12:00:00,2019-05,2019-05
5518825,5a41ecd1d467e6f8e3cafe9544c42461f97f02f3,059bda2b6b156207134204beed08bb2b9d3c3643c60e1c...,05/01/2019 12:00:00 PM,05/01/2019 12:15:00 PM,660.0,1.40,1.703108e+10,1.703132e+10,8.0,32.0,...,41.902788,-87.626146,POINT (-87.6261455896 41.9027880476),41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),2019-05-01 12:00:00,2019-05-01 12:15:00,2019-05,2019-05
5518826,5a624313d6a193a3522a4a50d2bbe68b6754d04c,5e3077a68e0439a1168a98d848e9ae3fd8c74f61bc4323...,05/01/2019 12:00:00 PM,05/01/2019 12:15:00 PM,660.0,1.10,1.703132e+10,1.703184e+10,32.0,32.0,...,41.884987,-87.620993,POINT (-87.6209929134 41.8849871918),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:00:00,2019-05-01 12:15:00,2019-05,2019-05
5518827,5afa5a13c6c3ba2ca2c44cde9886ff3e4bd62281,54b032d1527c44cbca520e9af4e89090945868e46dd774...,05/01/2019 12:00:00 PM,05/01/2019 12:30:00 PM,1860.0,17.20,1.703198e+10,1.703108e+10,76.0,8.0,...,41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),2019-05-01 12:00:00,2019-05-01 12:30:00,2019-05,2019-05
5518830,662421ff994c16eaa80426ed523d2ef5770e1df9,a940425dd1a398a41024ee7bb70bbbf66515799b941674...,05/01/2019 12:00:00 PM,05/01/2019 12:00:00 PM,300.0,1.10,1.703108e+10,1.703108e+10,8.0,8.0,...,41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),2019-05-01 12:00:00,2019-05-01 12:00:00,2019-05,2019-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5519517,37870628d394ae4668ce89d4fdaf9340a7dca9dd,21c91ae089317069d5b6e18187c887882b0dcce86d9302...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,420.0,0.90,1.703108e+10,1.703184e+10,8.0,32.0,...,41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05
5519520,389cbd4668b040ae05a61b522062c03bac074c98,2d49dd422b09d66e3e2da3120413ee76b8e28e19fd2e21...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,360.0,1.00,1.703184e+10,1.703183e+10,32.0,28.0,...,41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),41.879067,-87.657005,POINT (-87.657005027 41.8790669938),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05
5519521,38c4a98e6705a3c30c18e6cf3b05fb9f53a540ed,847cf962bd6f62040673e6c24c24940aeb2d7fdaa54677...,05/01/2019 12:15:00 PM,05/01/2019 12:30:00 PM,1080.0,1.40,1.703132e+10,1.703184e+10,32.0,32.0,...,41.877406,-87.621972,POINT (-87.6219716519 41.8774061234),41.880994,-87.632746,POINT (-87.6327464887 41.8809944707),2019-05-01 12:15:00,2019-05-01 12:30:00,2019-05,2019-05
5519523,38fe56fec319fcd3898a01ab7651feb9ddf02d14,1b2a60b8aca7f106dc2ddcd51b43e9448be7920ca7a24d...,05/01/2019 12:15:00 PM,05/01/2019 12:15:00 PM,360.0,0.70,1.703108e+10,1.703108e+10,8.0,8.0,...,41.891972,-87.612945,POINT (-87.6129454143 41.8919715078),41.895033,-87.619711,POINT (-87.6197106717 41.8950334495),2019-05-01 12:15:00,2019-05-01 12:15:00,2019-05,2019-05


In [141]:
timer.stop()

Elapsed time: 5.4200 seconds


5.420034500013571

In [142]:
# Query 6: Selecting a single row from an unindexed dataframe - using Pandas
timer.start()
taxi_trips_pdf[taxi_trips_pdf['trip_id'] == 'd3e437e3af9c691e6a9fe1f9802605d989605fdb']

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
13905408,d3e437e3af9c691e6a9fe1f9802605d989605fdb,ac66734513cb7344a5501fa22f825c940448a4041e0302...,10/30/2019 05:30:00 PM,10/30/2019 06:00:00 PM,1396.0,1.14,17031080000.0,17031080000.0,8.0,8.0,...,41.902788,-87.626146,POINT (-87.6261455896 41.9027880476),41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),2019-10-30 17:30:00,2019-10-30 18:00:00,2019-10,2019-10


In [143]:
timer.stop()

Elapsed time: 0.8219 seconds


0.8218952000024728

In [144]:
# Query 6: Selecting a single row from an unindexed dataframe - using Terality
timer.start()
taxi_trips_tdf[taxi_trips_tdf['trip_id'] == 'd3e437e3af9c691e6a9fe1f9802605d989605fdb']

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
13905408,d3e437e3af9c691e6a9fe1f9802605d989605fdb,ac66734513cb7344a5501fa22f825c940448a4041e0302...,10/30/2019 05:30:00 PM,10/30/2019 06:00:00 PM,1396.0,1.14,17031080000.0,17031080000.0,8.0,8.0,...,41.902788,-87.626146,POINT (-87.6261455896 41.9027880476),41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),2019-10-30 17:30:00,2019-10-30 18:00:00,2019-10,2019-10


In [145]:
timer.stop()

Elapsed time: 7.7843 seconds


7.78432530000282

In [146]:
# Query 7: Range queries using isin() - Using Pandas
timer.start()
taxi_trips_pdf[taxi_trips_pdf['company'].isin(['Taxi Affiliation Services', 'Blue Diamond', 'Chicago Taxicab', 'Nova Taxi Affiliation Llc', 'U Taxicab'])]

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,...,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid__location,trip_start_dt,trip_end_dt,trip_start_ym,trip_end_ym
20,509e870adce2a514a4287e1a305fe4ddabc59836,38f6145c9a2b848dc1baa16fd91087e606b12bcb8757a9...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,0.8,1.703108e+10,1.703108e+10,8.0,8.0,...,41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),2019-01-01 00:00:00,2019-01-01 00:15:00,2019-01,2019-01
22,31004535c12d4372c165e34386cde0521a0a72e8,ee8a2f86372e99ecf87d44e13a20ebda7db8ae69d5b444...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,1.0,1.703108e+10,1.703108e+10,8.0,8.0,...,41.890922,-87.618868,POINT (-87.6188683546 41.8909220259),41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),2019-01-01 00:00:00,2019-01-01 00:15:00,2019-01,2019-01
30,601ea33fd396dadf3c05667ce8badbab55968930,72cd7073f26565b4a99646e795893b87b06e8086ad9a49...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,180.0,0.0,1.703108e+10,1.703108e+10,8.0,8.0,...,41.895033,-87.619711,POINT (-87.6197106717 41.8950334495),41.890922,-87.618868,POINT (-87.6188683546 41.8909220259),2019-01-01 00:00:00,2019-01-01 00:15:00,2019-01,2019-01
35,d1c76d15cec1a2ee58b408aff06e303aa86dd37f,d38890256d8a1e8146f0b15dc23e3b2a140b4fa3834698...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,300.0,0.0,1.703108e+10,1.703108e+10,8.0,8.0,...,41.892508,-87.626215,POINT (-87.6262149064 41.8925077809),41.898332,-87.620763,POINT (-87.6207628651 41.8983317935),2019-01-01 00:00:00,2019-01-01 00:15:00,2019-01,2019-01
36,e933481970a4efe76e013706d4056104dbf3a599,3506f62ee1c70c02fe00b742679684b319d5b712182f82...,01/01/2019 12:00:00 AM,01/01/2019 12:15:00 AM,360.0,0.0,1.703108e+10,1.703108e+10,8.0,8.0,...,41.892042,-87.631864,POINT (-87.6318639497 41.8920421365),41.892073,-87.628874,POINT (-87.6288741572 41.8920726347),2019-01-01 00:00:00,2019-01-01 00:15:00,2019-01,2019-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20365970,8b2fb9ed7cb74a7645c95538373111ac90be222a,7476a493f34aa1075570e89104cdaf3aaf5c0fd4679382...,12/31/2020 08:45:00 PM,12/31/2020 09:00:00 PM,1200.0,17.1,1.703198e+10,1.703128e+10,76.0,28.0,...,41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),41.879255,-87.642649,POINT (-87.642648998 41.8792550844),2020-12-31 20:45:00,2020-12-31 21:00:00,2020-12,2020-12
20366091,91fddaa11017287697ed65e5b6fb6399ea4f7ac4,5f88daef22c65f61d051338809a96d6bc7f471c4e2cd73...,12/31/2020 09:30:00 PM,12/31/2020 09:30:00 PM,120.0,0.2,1.703132e+10,1.703132e+10,32.0,32.0,...,41.884987,-87.620993,POINT (-87.6209929134 41.8849871918),41.884987,-87.620993,POINT (-87.6209929134 41.8849871918),2020-12-31 21:30:00,2020-12-31 21:30:00,2020-12,2020-12
20366290,debd80beca876e44a7d485a2fd2fd66816f8fc2b,42574f4a000f78c9ccbf2cfc282f31617096a9fea4a0d5...,12/31/2020 10:45:00 PM,12/31/2020 11:00:00 PM,180.0,0.0,1.703198e+10,1.703198e+10,76.0,76.0,...,41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),2020-12-31 22:45:00,2020-12-31 23:00:00,2020-12,2020-12
20366292,e9fc3c32d17dd7dcd54cabaca1781d4684b8832d,be2af1b95b937dfe54bf9a4f4331745a673529a7edfab2...,12/31/2020 10:45:00 PM,12/31/2020 11:00:00 PM,480.0,2.8,1.703198e+10,1.703198e+10,76.0,76.0,...,41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),41.979071,-87.903040,POINT (-87.9030396611 41.9790708201),2020-12-31 22:45:00,2020-12-31 23:00:00,2020-12,2020-12


In [147]:
timer.stop()

Elapsed time: 2.2615 seconds


2.2614646000001812

In [148]:
# Query 7: Range queries using isin() - Using Terality
timer.start()
taxi_trips_pdf['company'].isin(['Taxi Affiliation Services', 'Blue Diamond', 'Chicago Taxicab', 'Nova Taxi Affiliation Llc', 'U Taxicab'])

16          False
19          False
20           True
22           True
23          False
            ...  
20366129    False
20366142    False
20366290     True
20366292     True
20366309     True
Name: company, Length: 12199631, dtype: bool

In [149]:
timer.stop()

Elapsed time: 0.5406 seconds


0.5405892999988282