In [84]:
import pandas as pd
import numpy as np

from datetime import datetime

In [109]:
zones_df = pd.read_json("data/taxizones.json", orient="index")

In [110]:
zones_df

Unnamed: 0,id,name,boro,cX,cY,raw_X,raw_Y
1,1,Newark Airport,EWR,145.388391,528.664351,-74.174000,40.691831
2,2,Jamaica Bay,Queens,764.634396,687.534736,-73.826483,40.624196
3,3,Allerton/Pelham Gardens,Bronx,727.322333,122.409068,-73.847422,40.864474
4,4,Alphabet City,Manhattan,496.483468,453.629476,-73.976968,40.723752
5,5,Arden Heights,Staten Island,119.579529,855.392339,-74.188484,40.552659
...,...,...,...,...,...,...,...
259,259,Woodlawn/Wakefield,Bronx,718.781423,43.554036,-73.852215,40.897932
260,260,Woodside,Queens,622.395710,405.462003,-73.906306,40.744235
261,261,World Trade Center,Manhattan,432.234798,487.982836,-74.013024,40.709139
262,262,Yorkville East,Manhattan,550.510228,331.079800,-73.946648,40.775853


In [115]:
def cleaner(df, category):
    new_df = pd.DataFrame()
    if (category == "green"):
        # formatting times
        new_df["pu_time"] = pd.to_datetime(df["lpep_pickup_datetime"], format="%m/%d/%Y %I:%M:%S %p")
        new_df["do_time"] = pd.to_datetime(df["lpep_dropoff_datetime"], format="%m/%d/%Y %I:%M:%S %p")

        # calculating duration
        new_df["duration"] = (new_df["do_time"] - new_df["pu_time"]).dt.total_seconds() / 60 # minutes

        # pulling over other info
        new_df["pu_zone"] = df["PULocationID"]
        new_df["do_zone"] = df["DOLocationID"]
        new_df["distance"] =  df["trip_distance"]

        # calculating total price
        new_df["total_lesstip"] = df["total_amount"] - df["tip_amount"]

        # type = green
        new_df["type"] = "green"

        # pulling in PU, DO data
        new_df = new_df.merge(zones_df, left_on="pu_zone", right_on="id", how="left").drop(["id", "cX", "cY"], axis=1)
        new_df = new_df.rename(columns={"name": "pu_nbhd", "boro": "pu_boro", "raw_X" : "pu_X",  "raw_Y" : "pu_Y"})

        new_df = new_df.merge(zones_df, left_on="do_zone", right_on="id", how="left").drop(["id", "cX", "cY"], axis=1)
        new_df = new_df.rename(columns={"name": "do_nbhd", "boro": "do_boro", "raw_X" : "do_X",  "raw_Y" : "do_Y"})

        # dropping NAs
        new_df = new_df.dropna()
        
    if (category == "yellow"):
        # formatting times
        new_df["pu_time"] = pd.to_datetime(df["tpep_pickup_datetime"], format="%m/%d/%Y %I:%M:%S %p")
        new_df["do_time"] = pd.to_datetime(df["tpep_dropoff_datetime"], format="%m/%d/%Y %I:%M:%S %p")

        # calculating duration
        new_df["duration"] = (new_df["do_time"] - new_df["pu_time"]).dt.total_seconds() / 60 # minutes

        # pulling over other info
        new_df["pu_zone"] = df["PULocationID"]
        new_df["do_zone"] = df["DOLocationID"]
        new_df["distance"] =  df["trip_distance"]

        # calculating total price
        new_df["total_lesstip"] = df["total_amount"] - df["tip_amount"]

        # type = yellow
        new_df["type"] = "yellow"

        # pulling in PU, DO data
        new_df = new_df.merge(zones_df, left_on="pu_zone", right_on="id", how="left").drop(["id", "cX", "cY"], axis=1)
        new_df = new_df.rename(columns={"name": "pu_nbhd", "boro": "pu_boro", "raw_X" : "pu_X",  "raw_Y" : "pu_Y"})

        new_df = new_df.merge(zones_df, left_on="do_zone", right_on="id", how="left").drop(["id", "cX", "cY"], axis=1)
        new_df = new_df.rename(columns={"name": "do_nbhd", "boro": "do_boro", "raw_X" : "do_X",  "raw_Y" : "do_Y"})

        # dropping NAs
        new_df = new_df.dropna()

    return new_df

In [125]:
green_2017_df = cleaner(\
    pd.read_csv("data/trip_data/2017_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2017_df = cleaner(\
    pd.read_csv("data/trip_data/2017_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [134]:
green_2018_df = cleaner(
    pd.read_csv("data/trip_data/2018_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2018_df = cleaner(\
    pd.read_csv("data/trip_data/2018_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [136]:
green_2019_df = cleaner(
    pd.read_csv("data/trip_data/2019_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2019_df = cleaner(\
    pd.read_csv("data/trip_data/2019_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [141]:
green_2020_df = cleaner(
    pd.read_csv("data/trip_data/2020_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2020_df = cleaner(\
    pd.read_csv("data/trip_data/2020_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [144]:
green_2021_df = cleaner(
    pd.read_csv("data/trip_data/2021_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2021_df = cleaner(\
    pd.read_csv("data/trip_data/2021_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [147]:
green_2022_df = cleaner(
    pd.read_csv("data/trip_data/2022_green_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type', 'ehail_fee', 'trip_type'], axis=1),\
    "green")

yellow_2022_df = cleaner(\
    pd.read_csv("data/trip_data/2022_yellow_tripdata.csv").drop(['VendorID', 'passenger_count', 'RatecodeID', 'store_and_fwd_flag', 'payment_type'], axis=1),\
    "yellow")

In [149]:
yellow_2022_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2022-02-09 00:00:00,2022-02-09 00:42:46,42.766667,162,40,11.24,40.80,yellow,Midtown East,Manhattan,-73.972356,40.756688,Carroll Gardens,Brooklyn,-73.995956,40.679199
1,2022-02-09 00:00:04,2022-02-09 00:16:01,15.950000,138,236,8.19,36.10,yellow,LaGuardia Airport,Queens,-73.873278,40.774352,Upper East Side North,Manhattan,-73.957012,40.780436
2,2022-02-09 00:00:04,2022-02-09 00:20:42,20.633333,138,234,10.58,36.05,yellow,LaGuardia Airport,Queens,-73.873278,40.774352,Union Sq,Manhattan,-73.990458,40.740337
3,2022-02-09 00:00:05,2022-02-09 00:10:54,10.816667,249,230,2.70,14.30,yellow,West Village,Manhattan,-74.002875,40.734576,Times Sq/Theatre District,Manhattan,-73.984196,40.759818
4,2022-02-09 00:00:06,2022-02-09 00:08:39,8.550000,234,229,3.03,13.80,yellow,Union Sq,Manhattan,-73.990458,40.740337,Sutton Place/Turtle Bay North,Manhattan,-73.965146,40.756729
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106084,2022-02-09 23:59:51,2022-02-10 00:05:42,5.850000,230,141,1.26,10.30,yellow,Times Sq/Theatre District,Manhattan,-73.984196,40.759818,Lenox Hill West,Manhattan,-73.959635,40.766948
106085,2022-02-09 23:59:53,2022-02-10 00:15:43,15.833333,48,152,5.53,21.80,yellow,Clinton East,Manhattan,-73.989845,40.762253,Manhattanville,Manhattan,-73.953782,40.817975
106086,2022-02-09 23:59:53,2022-02-10 00:05:30,5.616667,114,79,1.04,9.80,yellow,Greenwich Village South,Manhattan,-73.997380,40.728340,East Village,Manhattan,-73.985937,40.727620
106087,2022-02-09 23:59:57,2022-02-10 00:12:28,12.516667,244,74,6.04,19.80,yellow,Washington Heights South,Manhattan,-73.941399,40.841709,East Harlem North,Manhattan,-73.937351,40.801170


In [148]:
green_2022_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2022-02-09 00:06:08,2022-02-09 00:13:59,7.850000,49,62,1.30,8.80,green,Clinton Hill,Brooklyn,-73.962363,40.687967,Crown Heights South,Brooklyn,-73.948789,40.666540
1,2022-02-09 00:07:40,2022-02-09 00:26:11,18.516667,260,134,7.77,25.30,green,Woodside,Queens,-73.906306,40.744235,Kew Gardens,Queens,-73.828713,40.708051
2,2022-02-09 00:07:49,2022-02-09 00:12:58,5.150000,134,134,0.90,6.80,green,Kew Gardens,Queens,-73.828713,40.708051,Kew Gardens,Queens,-73.828713,40.708051
3,2022-02-09 00:08:38,2022-02-09 00:13:29,4.850000,129,129,0.93,12.30,green,Jackson Heights,Queens,-73.885317,40.757312,Jackson Heights,Queens,-73.885317,40.757312
4,2022-02-09 00:09:16,2022-02-09 00:11:40,2.400000,127,127,0.55,5.30,green,Inwood,Manhattan,-73.919306,40.866081,Inwood,Manhattan,-73.919306,40.866081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2504,2022-02-09 23:55:31,2022-02-10 00:21:40,26.150000,74,125,11.67,38.05,green,East Harlem North,Manhattan,-73.937351,40.801170,Hudson Sq,Manhattan,-74.007486,40.726290
2505,2022-02-09 23:57:00,2022-02-10 00:09:00,12.000000,74,238,2.51,11.05,green,East Harlem North,Manhattan,-73.937351,40.801170,Upper West Side North,Manhattan,-73.973049,40.791705
2506,2022-02-09 23:58:20,2022-02-10 00:05:22,7.033333,74,41,1.38,8.30,green,East Harlem North,Manhattan,-73.937351,40.801170,Central Harlem,Manhattan,-73.951292,40.804334
2507,2022-02-09 23:58:23,2022-02-10 00:02:00,3.616667,75,75,0.58,7.30,green,East Harlem South,Manhattan,-73.945750,40.790011,East Harlem South,Manhattan,-73.945750,40.790011


In [146]:
yellow_2021_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2021-02-10 00:00:01,2021-02-10 00:11:41,11.666667,166,74,1.90,11.30,yellow,Morningside Heights,Manhattan,-73.961764,40.809457,East Harlem North,Manhattan,-73.937351,40.801170
1,2021-02-10 00:00:02,2021-02-10 00:16:00,15.966667,74,197,11.86,56.03,yellow,East Harlem North,Manhattan,-73.937351,40.801170,Richmond Hill,Queens,-73.830924,40.694542
2,2021-02-10 00:00:08,2021-02-10 00:17:53,17.750000,132,138,11.61,34.30,yellow,JFK Airport,Queens,-73.786533,40.646985,LaGuardia Airport,Queens,-73.873278,40.774352
3,2021-02-10 00:00:13,2021-02-10 00:13:46,13.550000,140,24,3.70,16.80,yellow,Lenox Hill East,Manhattan,-73.954739,40.765484,Bloomingdale,Manhattan,-73.965479,40.801970
4,2021-02-10 00:00:18,2021-02-10 00:09:39,9.350000,249,163,3.06,14.30,yellow,West Village,Manhattan,-74.002875,40.734576,Midtown North,Manhattan,-73.977569,40.764421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57850,2021-02-10 23:59:32,2021-02-11 00:19:34,20.033333,237,82,7.75,27.80,yellow,Upper East Side South,Manhattan,-73.965634,40.768615,Elmhurst,Queens,-73.877118,40.739496
57851,2021-02-10 23:59:35,2021-02-11 00:15:03,15.466667,140,202,4.18,18.30,yellow,Lenox Hill East,Manhattan,-73.954739,40.765484,Roosevelt Island,Manhattan,-73.949951,40.761900
57852,2021-02-10 23:59:37,2021-02-11 00:29:26,29.816667,138,123,17.81,50.30,yellow,LaGuardia Airport,Queens,-73.873278,40.774352,Homecrest,Brooklyn,-73.964334,40.599954
57853,2021-02-10 23:59:57,2021-02-11 00:07:46,7.816667,161,50,1.30,11.30,yellow,Midtown Center,Manhattan,-73.977698,40.758028,Clinton West,Manhattan,-73.995135,40.766238


In [145]:
green_2021_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2021-02-10 00:00:52,2021-02-10 00:28:13,27.350000,169,226,13.24,39.30,green,Mount Hope,Bronx,-73.905122,40.849058,Sunnyside,Queens,-73.924673,40.737699
1,2021-02-10 00:01:00,2021-02-10 00:14:00,13.000000,71,61,2.59,24.42,green,East Flatbush/Farragut,Brooklyn,-73.937966,40.644288,Crown Heights North,Brooklyn,-73.939287,40.674469
2,2021-02-10 00:03:00,2021-02-10 00:35:00,32.000000,220,148,14.26,62.70,green,Spuyten Duyvil/Kingsbridge,Bronx,-73.910665,40.882403,Lower East Side,Manhattan,-73.990896,40.718938
3,2021-02-10 00:12:23,2021-02-10 00:26:21,13.966667,75,213,6.52,20.80,green,East Harlem South,Manhattan,-73.945750,40.790011,Soundview/Castle Hill,Bronx,-73.838358,40.823325
4,2021-02-10 00:15:00,2021-02-10 00:32:00,17.000000,212,244,6.79,36.00,green,Soundview/Bruckner,Bronx,-73.869680,40.827902,Washington Heights South,Manhattan,-73.941399,40.841709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3207,2021-02-10 23:45:51,2021-02-11 00:01:38,15.783333,134,82,3.61,16.30,green,Kew Gardens,Queens,-73.828713,40.708051,Elmhurst,Queens,-73.877118,40.739496
3208,2021-02-10 23:50:28,2021-02-10 23:56:10,5.700000,75,238,1.27,7.80,green,East Harlem South,Manhattan,-73.945750,40.790011,Upper West Side North,Manhattan,-73.973049,40.791705
3209,2021-02-10 23:51:07,2021-02-10 23:58:33,7.433333,42,152,0.99,7.80,green,Central Harlem North,Manhattan,-73.940772,40.818258,Manhattanville,Manhattan,-73.953782,40.817975
3210,2021-02-10 23:51:50,2021-02-10 23:58:23,6.550000,43,263,1.44,11.05,green,Central Park,Manhattan,-73.965554,40.782478,Yorkville West,Manhattan,-73.951010,40.778766


In [143]:
yellow_2020_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2020-02-12 00:00:00,2020-02-12 00:16:43,16.716667,125,25,3.67,18.3,yellow,Hudson Sq,Manhattan,-74.007486,40.726290,Boerum Hill,Brooklyn,-73.986114,40.685634
1,2020-02-12 00:00:00,2020-02-12 00:06:08,6.133333,233,140,1.30,10.3,yellow,UN/Turtle Bay South,Manhattan,-73.970451,40.749919,Lenox Hill East,Manhattan,-73.954739,40.765484
3,2020-02-12 00:00:01,2020-02-12 00:22:18,22.283333,90,49,5.75,24.3,yellow,Flatiron,Manhattan,-73.996972,40.742279,Clinton Hill,Brooklyn,-73.962363,40.687967
4,2020-02-12 00:00:01,2020-02-12 00:20:44,20.716667,234,24,5.53,23.3,yellow,Union Sq,Manhattan,-73.990458,40.740337,Bloomingdale,Manhattan,-73.965479,40.801970
5,2020-02-12 00:00:02,2020-02-12 00:11:22,11.333333,79,144,1.70,13.3,yellow,East Village,Manhattan,-73.985937,40.727620,Little Italy/NoLiTa,Manhattan,-73.996919,40.720889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236423,2020-02-12 23:59:57,2020-02-13 00:06:19,6.366667,230,186,0.93,9.8,yellow,Times Sq/Theatre District,Manhattan,-73.984196,40.759818,Penn Station/Madison Sq West,Manhattan,-73.992438,40.748497
236424,2020-02-12 23:59:57,2020-02-13 00:37:19,37.366667,138,11,25.24,67.3,yellow,LaGuardia Airport,Queens,-73.873278,40.774352,Bath Beach,Brooklyn,-74.007488,40.604273
236425,2020-02-12 23:59:58,2020-02-13 00:09:52,9.900000,107,234,1.13,11.8,yellow,Gramercy,Manhattan,-73.984052,40.736824,Union Sq,Manhattan,-73.990458,40.740337
236426,2020-02-12 23:59:59,2020-02-13 00:35:04,35.083333,132,257,26.60,70.8,yellow,JFK Airport,Queens,-73.786533,40.646985,Windsor Terrace,Brooklyn,-73.977983,40.653612


In [142]:
green_2020_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2020-02-12 00:00:28,2020-02-12 00:12:14,11.766667,244,239,5.27,20.05,green,Washington Heights South,Manhattan,-73.941399,40.841709,Upper West Side South,Manhattan,-73.978633,40.783962
1,2020-02-12 00:00:40,2020-02-12 00:09:08,8.466667,82,129,1.50,9.30,green,Elmhurst,Queens,-73.877118,40.739496,Jackson Heights,Queens,-73.885317,40.757312
2,2020-02-12 00:00:42,2020-02-12 00:09:14,8.533333,82,70,1.87,9.80,green,Elmhurst,Queens,-73.877118,40.739496,East Elmhurst,Queens,-73.868396,40.763352
3,2020-02-12 00:00:42,2020-02-12 00:15:00,14.300000,65,224,5.18,21.55,green,Downtown Brooklyn/MetroTech,Brooklyn,-73.986086,40.695337,Stuy Town/Peter Cooper Village,Manhattan,-73.976598,40.731821
4,2020-02-12 00:01:00,2020-02-12 00:27:00,26.000000,97,55,12.67,41.56,green,Fort Greene,Brooklyn,-73.974882,40.690787,Coney Island,Brooklyn,-73.987943,40.576961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14959,2020-02-12 23:59:19,2020-02-13 00:09:48,10.483333,244,127,3.23,12.80,green,Washington Heights South,Manhattan,-73.941399,40.841709,Inwood,Manhattan,-73.919306,40.866081
14960,2020-02-12 23:59:25,2020-02-13 00:15:19,15.900000,82,92,6.16,20.80,green,Elmhurst,Queens,-73.877118,40.739496,Flushing,Queens,-73.828859,40.761102
14961,2020-02-12 23:59:38,2020-02-13 00:13:34,13.933333,152,48,4.85,21.05,green,Manhattanville,Manhattan,-73.953782,40.817975,Clinton East,Manhattan,-73.989845,40.762253
14962,2020-02-12 23:59:55,2020-02-13 00:12:05,12.166667,41,239,2.89,15.55,green,Central Harlem,Manhattan,-73.951292,40.804334,Upper West Side South,Manhattan,-73.978633,40.783962


In [137]:
yellow_2019_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2019-02-13 00:00:00,2019-02-13 00:25:11,25.183333,132,42,18.75,58.56,yellow,JFK Airport,Queens,-73.786533,40.646985,Central Harlem North,Manhattan,-73.940772,40.818258
1,2019-02-13 00:00:00,2019-02-13 09:20:37,560.616667,238,239,0.97,9.80,yellow,Upper West Side North,Manhattan,-73.973049,40.791705,Upper West Side South,Manhattan,-73.978633,40.783962
2,2019-02-13 00:00:00,2019-02-13 15:19:39,919.650000,90,43,3.95,27.30,yellow,Flatiron,Manhattan,-73.996972,40.742279,Central Park,Manhattan,-73.965554,40.782478
3,2019-02-13 00:00:00,2019-02-13 22:31:22,1351.366667,161,148,3.22,19.80,yellow,Midtown Center,Manhattan,-73.977698,40.758028,Lower East Side,Manhattan,-73.990896,40.718938
4,2019-02-13 00:00:03,2019-02-13 00:23:08,23.083333,129,48,5.80,24.80,yellow,Jackson Heights,Queens,-73.885317,40.757312,Clinton East,Manhattan,-73.989845,40.762253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267485,2019-02-13 23:59:57,2019-02-14 00:16:09,16.200000,237,79,3.98,18.30,yellow,Upper East Side South,Manhattan,-73.965634,40.768615,East Village,Manhattan,-73.985937,40.727620
267486,2019-02-13 23:59:58,2019-02-14 00:05:26,5.466667,164,170,0.50,9.30,yellow,Midtown South,Manhattan,-73.985157,40.748575,Murray Hill,Manhattan,-73.978492,40.747746
267487,2019-02-13 23:59:58,2019-02-14 00:11:11,11.216667,234,230,1.80,12.80,yellow,Union Sq,Manhattan,-73.990458,40.740337,Times Sq/Theatre District,Manhattan,-73.984196,40.759818
267488,2019-02-13 23:59:58,2019-02-14 00:18:33,18.583333,234,41,5.42,21.80,yellow,Union Sq,Manhattan,-73.990458,40.740337,Central Harlem,Manhattan,-73.951292,40.804334


In [138]:
green_2019_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2019-02-13 00:00:10,2019-02-13 00:07:57,7.783333,42,41,1.65,9.3,green,Central Harlem North,Manhattan,-73.940772,40.818258,Central Harlem,Manhattan,-73.951292,40.804334
1,2019-02-13 00:00:16,2019-02-13 00:18:37,18.350000,75,169,6.47,21.8,green,East Harlem South,Manhattan,-73.945750,40.790011,Mount Hope,Bronx,-73.905122,40.849058
2,2019-02-13 00:00:19,2019-02-13 00:03:24,3.083333,181,25,0.77,5.8,green,Park Slope,Brooklyn,-73.981414,40.670374,Boerum Hill,Brooklyn,-73.986114,40.685634
3,2019-02-13 00:00:36,2019-02-13 00:23:10,22.566667,41,185,8.98,27.8,green,Central Harlem,Manhattan,-73.951292,40.804334,Pelham Parkway,Bronx,-73.854394,40.854405
4,2019-02-13 00:00:45,2019-02-13 00:05:15,4.500000,82,82,0.75,6.3,green,Elmhurst,Queens,-73.877118,40.739496,Elmhurst,Queens,-73.877118,40.739496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21293,2019-02-13 23:58:35,2019-02-14 00:15:04,16.483333,80,129,5.30,20.3,green,East Williamsburg,Brooklyn,-73.936793,40.715370,Jackson Heights,Queens,-73.885317,40.757312
21294,2019-02-13 23:58:57,2019-02-14 00:12:42,13.750000,7,82,3.16,13.8,green,Astoria,Queens,-73.919694,40.761493,Elmhurst,Queens,-73.877118,40.739496
21295,2019-02-13 23:59:28,2019-02-13 23:59:56,0.466667,82,82,0.00,3.8,green,Elmhurst,Queens,-73.877118,40.739496,Elmhurst,Queens,-73.877118,40.739496
21296,2019-02-13 23:59:42,2019-02-14 00:00:16,0.566667,74,74,1.60,3.8,green,East Harlem North,Manhattan,-73.937351,40.801170,East Harlem North,Manhattan,-73.937351,40.801170


In [135]:
yellow_2018_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2018-02-14 23:59:59,2018-02-15 00:09:52,9.883333,189,61,2.10,10.8,yellow,Prospect Heights,Brooklyn,-73.967587,40.677636,Crown Heights North,Brooklyn,-73.939287,40.674469
1,2018-02-14 23:59:59,2018-02-15 00:13:57,13.966667,229,113,2.80,13.3,yellow,Sutton Place/Turtle Bay North,Manhattan,-73.965146,40.756729,Greenwich Village North,Manhattan,-73.994305,40.732579
2,2018-02-14 23:59:59,2018-02-15 00:04:33,4.566667,50,230,0.73,6.3,yellow,Clinton West,Manhattan,-73.995135,40.766238,Times Sq/Theatre District,Manhattan,-73.984196,40.759818
3,2018-02-14 23:59:59,2018-02-15 00:13:23,13.400000,138,226,5.64,19.3,yellow,LaGuardia Airport,Queens,-73.873278,40.774352,Sunnyside,Queens,-73.924673,40.737699
4,2018-02-14 23:59:58,2018-02-15 00:46:08,46.166667,100,188,8.30,33.8,yellow,Garment District,Manhattan,-73.988786,40.753513,Prospect-Lefferts Gardens,Brooklyn,-73.947442,40.658744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
321732,2018-02-14 00:00:00,2018-02-14 00:06:12,6.200000,107,162,1.75,8.3,yellow,Gramercy,Manhattan,-73.984052,40.736824,Midtown East,Manhattan,-73.972356,40.756688
321733,2018-02-14 00:00:00,2018-02-14 00:05:11,5.183333,234,100,1.00,7.3,yellow,Union Sq,Manhattan,-73.990458,40.740337,Garment District,Manhattan,-73.988786,40.753513
321734,2018-02-14 00:00:00,2018-02-14 00:18:07,18.116667,164,7,4.69,18.3,yellow,Midtown South,Manhattan,-73.985157,40.748575,Astoria,Queens,-73.919694,40.761493
321735,2018-02-14 00:00:00,2018-02-14 00:18:19,18.316667,142,79,5.44,19.8,yellow,Lincoln Square East,Manhattan,-73.981533,40.773634,East Village,Manhattan,-73.985937,40.727620


In [133]:
green_2018_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2018-02-14 00:00:00,2018-02-14 00:06:56,6.933333,37,37,1.58,8.8,green,Bushwick South,Brooklyn,-73.922240,40.694994,Bushwick South,Brooklyn,-73.922240,40.694994
1,2018-02-14 00:00:00,2018-02-14 00:17:59,17.983333,41,3,9.08,27.8,green,Central Harlem,Manhattan,-73.951292,40.804334,Allerton/Pelham Gardens,Bronx,-73.847422,40.864474
2,2018-02-14 00:00:02,2018-02-14 00:24:01,23.983333,66,80,4.62,19.8,green,DUMBO/Vinegar Hill,Brooklyn,-73.985702,40.702259,East Williamsburg,Brooklyn,-73.936793,40.715370
3,2018-02-14 00:00:04,2018-02-14 00:13:20,13.266667,49,80,2.92,13.8,green,Clinton Hill,Brooklyn,-73.962363,40.687967,East Williamsburg,Brooklyn,-73.936793,40.715370
4,2018-02-14 00:00:04,2018-02-14 00:11:02,10.966667,82,160,2.30,11.3,green,Elmhurst,Queens,-73.877118,40.739496,Middle Village,Queens,-73.880051,40.718337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29330,2018-02-14 23:59:29,2018-02-15 00:08:17,8.800000,255,80,1.41,8.8,green,Williamsburg (North Side),Brooklyn,-73.957418,40.718804,East Williamsburg,Brooklyn,-73.936793,40.715370
29331,2018-02-14 23:59:38,2018-02-15 00:17:10,17.533333,42,239,3.38,15.8,green,Central Harlem North,Manhattan,-73.940772,40.818258,Upper West Side South,Manhattan,-73.978633,40.783962
29332,2018-02-14 23:59:45,2018-02-15 00:01:29,1.733333,74,41,0.32,-4.8,green,East Harlem North,Manhattan,-73.937351,40.801170,Central Harlem,Manhattan,-73.951292,40.804334
29333,2018-02-14 23:59:45,2018-02-15 00:01:29,1.733333,74,41,0.32,4.8,green,East Harlem North,Manhattan,-73.937351,40.801170,Central Harlem,Manhattan,-73.951292,40.804334


In [139]:
yellow_2017_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
1,2017-02-15 00:00:00,2017-02-15 00:13:46,13.766667,237,226,4.10,15.8,yellow,Upper East Side South,Manhattan,-73.965634,40.768615,Sunnyside,Queens,-73.924673,40.737699
2,2017-02-15 00:00:00,2017-02-15 00:00:49,0.816667,148,148,2.10,3.8,yellow,Lower East Side,Manhattan,-73.990896,40.718938,Lower East Side,Manhattan,-73.990896,40.718938
3,2017-02-15 00:00:00,2017-02-15 00:24:08,24.133333,164,129,6.55,23.3,yellow,Midtown South,Manhattan,-73.985157,40.748575,Jackson Heights,Queens,-73.885317,40.757312
4,2017-02-15 00:00:00,2017-02-15 00:05:21,5.350000,97,17,1.21,7.3,yellow,Fort Greene,Brooklyn,-73.974882,40.690787,Bedford,Brooklyn,-73.949905,40.691507
5,2017-02-15 00:00:00,2017-02-15 00:10:10,10.166667,142,41,3.23,12.8,yellow,Lincoln Square East,Manhattan,-73.981533,40.773634,Central Harlem,Manhattan,-73.951292,40.804334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347209,2017-02-15 23:59:57,2017-02-16 00:12:55,12.966667,68,238,4.27,15.3,yellow,East Chelsea,Manhattan,-73.999917,40.748427,Upper West Side North,Manhattan,-73.973049,40.791705
347210,2017-02-15 23:59:58,2017-02-16 00:25:01,25.050000,231,42,9.20,29.8,yellow,TriBeCa/Civic Center,Manhattan,-74.007880,40.717773,Central Harlem North,Manhattan,-73.940772,40.818258
347211,2017-02-15 23:59:58,2017-02-16 00:01:30,1.533333,75,263,0.70,5.3,yellow,East Harlem South,Manhattan,-73.945750,40.790011,Yorkville West,Manhattan,-73.951010,40.778766
347212,2017-02-15 23:59:58,2017-02-16 00:10:02,10.066667,230,166,3.45,12.8,yellow,Times Sq/Theatre District,Manhattan,-73.984196,40.759818,Morningside Heights,Manhattan,-73.961764,40.809457


In [140]:
green_2017_df

Unnamed: 0,pu_time,do_time,duration,pu_zone,do_zone,distance,total_lesstip,type,pu_nbhd,pu_boro,pu_X,pu_Y,do_nbhd,do_boro,do_X,do_Y
0,2017-02-15 00:00:00,2017-02-15 00:00:00,0.000000,247,247,15.19,76.8,green,West Concourse,Bronx,-73.924409,40.828988,West Concourse,Bronx,-73.924409,40.828988
1,2017-02-15 00:00:00,2017-02-15 00:00:00,0.000000,95,134,2.19,15.8,green,Forest Hills,Queens,-73.847669,40.721432,Kew Gardens,Queens,-73.828713,40.708051
2,2017-02-15 00:00:00,2017-02-15 00:15:45,15.750000,7,173,3.10,14.8,green,Astoria,Queens,-73.919694,40.761493,North Corona,Queens,-73.863038,40.752579
3,2017-02-15 00:00:02,2017-02-15 00:05:50,5.800000,255,112,1.15,7.8,green,Williamsburg (North Side),Brooklyn,-73.957418,40.718804,Greenpoint,Brooklyn,-73.949538,40.729507
4,2017-02-15 00:00:03,2017-02-15 00:07:38,7.583333,75,238,1.26,8.8,green,East Harlem South,Manhattan,-73.945750,40.790011,Upper West Side North,Manhattan,-73.973049,40.791705
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35054,2017-02-15 23:59:40,2017-02-16 00:23:35,23.916667,256,89,9.03,28.8,green,Williamsburg (South Side),Brooklyn,-73.959905,40.710880,Flatbush/Ditmas Park,Brooklyn,-73.960968,40.637900
35055,2017-02-15 23:59:48,2017-02-16 00:01:48,2.000000,145,112,0.66,5.3,green,Long Island City/Hunters Point,Queens,-73.948891,40.745379,Greenpoint,Brooklyn,-73.949538,40.729507
35056,2017-02-15 23:59:51,2017-02-16 00:09:41,9.833333,130,122,2.58,11.3,green,Jamaica,Queens,-73.793980,40.704369,Hollis,Queens,-73.761137,40.710639
35057,2017-02-15 23:59:57,2017-02-16 00:13:09,13.200000,129,56,2.20,12.3,green,Jackson Heights,Queens,-73.885317,40.757312,Corona,Queens,-73.858845,40.741407


In [174]:
dict_dfs = {
    'yellow_2017_df' : yellow_2017_df,
    'green_2017_df' : green_2017_df,
    'yellow_2018_df' : yellow_2018_df,
    'green_2018_df' : green_2018_df,
    'yellow_2019_df' : yellow_2019_df,
    'green_2019_df' : green_2019_df,
    'yellow_2020_df' : yellow_2020_df,
    'green_2020_df' : green_2020_df,
    'yellow_2021_df' : yellow_2021_df,
    'green_2021_df' : green_2021_df,
    'yellow_2022_df' : yellow_2022_df,
    'green_2022_df' : green_2022_df
}

summarized_df = pd.DataFrame(list(dict_dfs.keys()))
summarized_df = summarized_df.rename(columns={0: "dataframe"})

In [191]:
for idx, row in summarized_df.iterrows():
    row['size'] = dict_dfs[row['dataframe']

KeyError: 0

In [190]:
summarized_df

Unnamed: 0,dataframe
0,yellow_2017_df
1,green_2017_df
2,yellow_2018_df
3,green_2018_df
4,yellow_2019_df
5,green_2019_df
6,yellow_2020_df
7,green_2020_df
8,yellow_2021_df
9,green_2021_df
