In [1]:
import pandas as pd
import numpy as np
import folium
import geopandas as gp
import numexpr as ne


## Pull out the suppresed trips and assign them to census tracts

* Trips are suppressed to community areas when there is less than 3 trips for OD-15min 
* Assign those trips randomly to the census tracts within the community area that the suppressed trip is assigned to

In [2]:
tods = [1,2,3,4,5]
years = [2018,2019,2020]

In [10]:
agg = {'SCALED_SUP_PRIVATE_TRIPS':'sum', 'SCALED_SUP_SHARED_TRIPS':'sum', 'Pickup Community Area':'first', 'Dropoff Community Area':'first'}

In [11]:
#df2 = pd.read_hdf('D:/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Chicago_TNC_Trips_20.H5', where = 'YEAR == ' + str(year), key = 'Weekday_' + str(tod))


In [12]:
#df2

In [14]:
store = pd.HDFStore('D:/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Chicago_TNC_Trips_20.H5')

In [15]:
#df = pd.read_hdf('C:/Workspace/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Chicago_TNC_Trips_20.H5', where = ['YEAR == 2019'], key = 'Weekday_' + str(1))

In [16]:
df = store.select(where = ['YEAR == 2018'], key = 'Weekday_' + str(1))

In [18]:
df['MINUTE'] = df['Trip Start Timestamp'].dt.minute


In [29]:
df[df['MONTH'] == 11]['Trip Start Timestamp'].min()

Timestamp('2018-11-01 01:00:00')

In [7]:
conflation = pd.read_csv('D:/TNC-Demand-Model/Inputs/Chicago Community Areas/Community_Area_to_Census_Tract.csv')
conflation['COMMUNITY_AREA'] = conflation.area_num_1.astype(float)
store = pd.HDFStore('D:/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Chicago_TNC_Trips_20.H5')

for year in years:  
    if year == 2018:
        months = [11,12]
    elif year == 2020:
        months = [1,2]
    else:
        months = [1,2,3,4,5,6,7,8,9,10,11,12]
        
    print('Working on year ' + str(year))
    df5 = pd.DataFrame()
    
    for month in months:
        print('Working on month ' + str(month))
        df4 = pd.DataFrame()
        
        for tod in tods:
            print('Working on tod ' + str(tod))

            df3 = pd.DataFrame()
            df2 = pd.DataFrame()
            df = pd.DataFrame()
            df = store.select(where = ['YEAR == ' + str(year)], key = 'Weekday_' + str(tod))

            print('Filtering the Data!')
           # df = df[df['MONTH'].isin(months)]
            df['DAY'] = df['Trip Start Timestamp'].dt.day

            print(str(len(df[(~np.isnan(df['Pickup Community Area']))&(np.isnan(df['Pickup Census Tract']))&(~np.isnan(df['Dropoff Community Area'])) & (np.isnan(df['Dropoff Census Tract']))])) +  ' Trip Records with Suppressed Origin and Destination out of ' + str(len(df)) )
            print(str(len(df[((np.isnan(df['Pickup Community Area']))&(~np.isnan(df['Pickup Census Tract'])))|((np.isnan(df['Dropoff Community Area'])) & (np.isnan(df['Dropoff Census Tract'])))])) +  ' Trip Records Outside of Chicago but within cook county out of ' + str(len(df)) )
            print(str(len(df[((~np.isnan(df['Pickup Community Area']))&(~np.isnan(df['Pickup Census Tract'])))&((~np.isnan(df['Dropoff Community Area'])) & (~np.isnan(df['Dropoff Census Tract'])))])) +  ' Trip Records with both trip ends within Chicago out of ' + str(len(df)) )
            print(str(len(df[(np.isnan(df['Pickup Community Area']))|(np.isnan(df['Dropoff Community Area']))])) +  ' Trip Records with one trip end outside of Cook county out of ' + str(len(df)) )


            #select out the trips that have community area data and are missing census tract data 
            df['SUP_PRIVATE_TRIPS'] = np.where(df['Shared Trip Authorized'] == False, 1, 0)
            df['SUP_SHARED_TRIPS'] = np.where(df['Shared Trip Authorized'] == True, 1, 0)
            df = df[(np.isnan(df['Pickup Census Tract']))|(np.isnan(df['Dropoff Census Tract']))]
            df = df[~np.isnan(df['Pickup Community Area'])&(~np.isnan(df['Dropoff Community Area']))]


            df = df.groupby(by = ['Pickup Community Area', 'Dropoff Community Area', 'YEAR','MONTH','DAY'], as_index = False).sum()
            df = df.groupby(by = ['Pickup Community Area', 'Dropoff Community Area','YEAR','MONTH'], as_index = False).mean()

            print('There are ' +str(df.SUP_PRIVATE_TRIPS.sum() + df.SUP_SHARED_TRIPS.sum()) + ' Average Weekday Trips that are Suppressed!')

            #make a column to iterate through
            df['OD_PAIRS'] = df['Pickup Community Area'].astype(str) + '_' + df['Dropoff Community Area'].astype(str)

            #iterate through each of the suppressed Community Areas
            print('Working on assigning suppressed trips!')
            for od in df['OD_PAIRS'].unique():

                #select out the trips that are originating from the given community area
                od_trips = df[df['OD_PAIRS'] == od]

                #conflate the community area to the census tract centroids that fall within it
                df2 = od_trips[['SUP_PRIVATE_TRIPS', 'SUP_SHARED_TRIPS', 'Pickup Community Area', 'Dropoff Community Area']].merge(conflation[['GEOID','COMMUNITY_AREA']], how = 'left', left_on = 'Pickup Community Area' , right_on = 'COMMUNITY_AREA')
                df2 = df2.merge(conflation[['GEOID','area_num_1']], how = 'left', left_on = 'Dropoff Community Area' , right_on = 'area_num_1', suffixes = ('_PICKUP','_DROPOFF'))

                df2['SCALAR'] = np.random.dirichlet(np.ones(len(df2)))
                df2['SCALED_SUP_PRIVATE_TRIPS'] = df2['SUP_PRIVATE_TRIPS']*df2['SCALAR']
                df2['SCALED_SUP_SHARED_TRIPS'] = df2['SUP_SHARED_TRIPS']*df2['SCALAR']
                df3 = df3.append(df2)
            df3['TOD'] = tod       
            
            df4 = df3.append(df3)
        df4['MONTH'] = month
        print(str(df4.SCALED_SUP_PRIVATE_TRIPS.sum()))
        print(str(df4.SCALED_SUP_SHARED_TRIPS.sum()))
        
        df5 = df4.append(df4)
    df5['YEAR'] = year    
       
    df6 = df5.append(df5)
    

grouped = df6[['SCALED_SUP_PRIVATE_TRIPS','SCALED_SUP_SHARED_TRIPS', 'GEOID_PICKUP','GEOID_DROPOFF', 'MONTH','YEAR','Pickup Community Area', 'Dropoff Community Area']].groupby(by = ['GEOID_PICKUP', 'GEOID_DROPOFF','MONTH','YEAR'], as_index = False).agg(agg)        




print('Everyting is Complete!')

Working on year 2018
Working on month 11
Working on tod 1
Filtering the Data!
563808 Trip Records with Suppressed Origin and Destination out of 1800773
138037 Trip Records Outside of Chicago but within cook county out of 1800773
978219 Trip Records with both trip ends within Chicago out of 1800773
258746 Trip Records with one trip end outside of Cook county out of 1800773
There are 31928.0321587769 Average Weekday Trips that are Suppressed!
Working on assigning suppressed trips!
Working on tod 2
Filtering the Data!
330100 Trip Records with Suppressed Origin and Destination out of 1558829
89564 Trip Records Outside of Chicago but within cook county out of 1558829
1035235 Trip Records with both trip ends within Chicago out of 1558829
193494 Trip Records with one trip end outside of Cook county out of 1558829
There are 20861.294404417393 Average Weekday Trips that are Suppressed!
Working on assigning suppressed trips!
Working on tod 3
Filtering the Data!
835235 Trip Records with Suppresse

KeyboardInterrupt: 

# Testing the new process suppressed trips script

In [2]:
conflation = pd.read_csv('D:/TNC-Demand-Model/Inputs/Chicago Community Areas/Community_Area_to_Census_Tract.csv')
conflation['COMMUNITY_AREA'] = conflation.area_num_1.astype(float)
store = pd.HDFStore('D:/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Chicago_TNC_Trips_20.H5')

In [3]:
agg = {'SUP_PRIVATE_TRIPS':'sum', 'SUP_SHARED_TRIPS':'sum', 'Pickup Community Area':'first', 'Dropoff Community Area':'first','Trip Seconds':'mean','Trip Miles':'mean', 'Fare':'mean', 'Tip':'mean', 'Additional Charges':'mean', 'Trip Total':'mean','Trips Pooled':'sum'}


In [4]:
df = store.select(where = ['YEAR == 2018', 'MONTH == 11'], key = 'Weekday_3')

In [5]:
df['DAY'] = df['Trip Start Timestamp'].dt.day
df['MINUTE'] = df['Trip Start Timestamp'].dt.minute


df = df[~np.isnan(df['Pickup Community Area'])&(~np.isnan(df['Dropoff Community Area']))]
df2 = df[(np.isnan(df['Pickup Census Tract']))|(np.isnan(df['Dropoff Census Tract']))]
df2['SUP_PRIVATE_TRIPS'] = np.where(df2['Shared Trip Authorized'] == False, 1, 0)
df2['SUP_SHARED_TRIPS'] = np.where(df2['Shared Trip Authorized'] == True, 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [6]:
#select out the trips that have community area data and are missing census tract data 
df = df[~np.isnan(df['Pickup Community Area'])&(~np.isnan(df['Dropoff Community Area']))]
df2 = df[(np.isnan(df['Pickup Census Tract']))|(np.isnan(df['Dropoff Census Tract']))]
df2['SUP_PRIVATE_TRIPS'] = np.where(df2['Shared Trip Authorized'] == False, 1, 0)
df2['SUP_SHARED_TRIPS'] = np.where(df2['Shared Trip Authorized'] == True, 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [7]:
#aggregate to a 15 minute window because that is what the privacy masking allows...aggregating suppressed (df2) and unsuppressed trips (df)
# I do a first aggregation for the unsuppressed trips because I only care about keeping the Pickup Cenus Tracts and Dropoff Census Tracts
df = df.groupby(by = ['Pickup Census Tract', 'Dropoff Census Tract', 'YEAR','MONTH','DAY','HOUR','MINUTE'], as_index = False).first()
df2 = df2.groupby(by = ['Pickup Community Area', 'Dropoff Community Area', 'YEAR','MONTH','DAY','HOUR','MINUTE'], as_index = False).agg(agg)


In [8]:
#make a column to iterate through
df2['OD_PAIRS'] = df2['Pickup Community Area'].astype(str) + '_' + df2['Dropoff Community Area'].astype(str)

#conflate the community area to the census tract centroids that fall within it
df2 = df2.merge(conflation[['GEOID','COMMUNITY_AREA']], how = 'left', left_on = 'Pickup Community Area' , right_on = 'COMMUNITY_AREA')
df2 = df2.merge(conflation[['GEOID','COMMUNITY_AREA']], how = 'left', left_on = 'Dropoff Community Area' , right_on = 'COMMUNITY_AREA', suffixes = ('_PICKUP','_DROPOFF'))


In [9]:
od_drop = df2.merge(df,how = 'inner', left_on = ['YEAR','MONTH','DAY','HOUR','MINUTE','GEOID_PICKUP','GEOID_DROPOFF'], right_on = ['YEAR','MONTH','DAY','HOUR','MINUTE','Pickup Census Tract','Dropoff Census Tract'])
od_drop['OD'] = od_drop['GEOID_PICKUP'].astype(str) +'_' + od_drop['GEOID_DROPOFF'].astype(str)
df2['OD'] = df2['GEOID_PICKUP'].astype(str) +'_' + df2['GEOID_DROPOFF'].astype(str)

df2 = df2[~df2['OD'].isin(od_drop['OD'])]

In [31]:
df2[df2["OD_PAIRS"] == '9.0_17.0']

Unnamed: 0,YEAR,MONTH,DAY,HOUR,MINUTE,SUP_PRIVATE_TRIPS,SUP_SHARED_TRIPS,Pickup Community Area,Dropoff Community Area,Trip Seconds,...,Fare,Tip,Additional Charges,Trip Total,Trips Pooled,OD_PAIRS,GEOID_PICKUP,COMMUNITY_AREA_PICKUP,GEOID_DROPOFF,COMMUNITY_AREA_DROPOFF
14921895,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14921928,2018,11,2,13,30,1,0,9.0,17.0,973.0,...,10.0,0.0,2.5,12.5,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14921961,2018,11,2,13,45,1,0,9.0,17.0,1232.0,...,10.0,0.0,2.5,12.5,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14921994,2018,11,6,10,45,0,1,9.0,17.0,820.0,...,10.0,0.0,0.0,10.0,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922027,2018,11,7,10,45,1,0,9.0,17.0,1088.0,...,12.5,0.0,2.5,15.0,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922060,2018,11,9,11,30,0,1,9.0,17.0,1028.0,...,10.0,0.0,2.5,12.5,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922093,2018,11,12,13,15,0,1,9.0,17.0,1085.0,...,5.0,0.0,2.5,7.5,2.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922126,2018,11,14,13,15,0,1,9.0,17.0,851.0,...,7.5,0.0,2.5,10.0,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922159,2018,11,16,13,0,0,1,9.0,17.0,1155.0,...,15.0,0.0,0.0,15.0,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0
14922192,2018,11,19,11,30,0,1,9.0,17.0,1080.0,...,10.0,0.0,2.5,12.5,1.0,9.0_17.0,17031090100,9.0,17031170900,17.0


In [34]:
conflation[conflation['COMMUNITY_AREA']==9.0]

Unnamed: 0.1,Unnamed: 0,area_num_1,GEOID,COMMUNITY_AREA
782,76,9,17031090300,9.0
783,76,9,17031090200,9.0
784,76,9,17031090100,9.0


In [50]:
conflation[conflation['COMMUNITY_AREA']==17.0]['GEOID']

137    17031170900
138    17031170600
139    17031170700
140    17031170800
141    17031171000
142    17031171100
143    17031170400
144    17031170300
145    17031170200
146    17031170500
147    17031170100
Name: GEOID, dtype: int64

In [10]:
od_trips = df2[(df2["OD_PAIRS"] == '9.0_17.0')&(df2["DAY"] == 1)&(df2["HOUR"] == 9)&(df2["MINUTE"] == 45)]

od_trips

Unnamed: 0,YEAR,MONTH,DAY,HOUR,MINUTE,SUP_PRIVATE_TRIPS,SUP_SHARED_TRIPS,Pickup Community Area,Dropoff Community Area,Trip Seconds,...,Tip,Additional Charges,Trip Total,Trips Pooled,OD_PAIRS,GEOID_PICKUP,COMMUNITY_AREA_PICKUP,GEOID_DROPOFF,COMMUNITY_AREA_DROPOFF,OD
14921873,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170900,17.0,17031090300_17031170900
14921874,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170600,17.0,17031090300_17031170600
14921875,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170700,17.0,17031090300_17031170700
14921876,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170800,17.0,17031090300_17031170800
14921877,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031171000,17.0,17031090300_17031171000
14921878,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031171100,17.0,17031090300_17031171100
14921879,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170400,17.0,17031090300_17031170400
14921880,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170300,17.0,17031090300_17031170300
14921881,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170200,17.0,17031090300_17031170200
14921882,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170500,17.0,17031090300_17031170500


In [54]:
od_trips = df2[(df2["OD_PAIRS"] == '9.0_17.0')&(df2["DAY"] == 1)&(df2["HOUR"] == 9)&(df2["MINUTE"] == 45)]

od_trips

Unnamed: 0,YEAR,MONTH,DAY,HOUR,MINUTE,SUP_PRIVATE_TRIPS,SUP_SHARED_TRIPS,Pickup Community Area,Dropoff Community Area,Trip Seconds,...,Fare,Tip,Additional Charges,Trip Total,Trips Pooled,OD_PAIRS,GEOID_PICKUP,COMMUNITY_AREA_PICKUP,GEOID_DROPOFF,COMMUNITY_AREA_DROPOFF
351,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170900,17.0
352,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170600,17.0
353,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170700,17.0
354,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170800,17.0
355,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031171000,17.0
356,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031171100,17.0
357,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170400,17.0
358,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170300,17.0
359,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170200,17.0
360,2018,11,1,9,45,0,1,9.0,17.0,838.0,...,12.5,0.0,0.0,12.5,1.0,9.0_17.0,17031090300,9.0,17031170500,17.0


In [51]:
od_drop[(od_drop['Pickup Census Tract'] == 17031090200)&(od_drop['Dropoff Census Tract']== 17031170500)]

Unnamed: 0,YEAR,MONTH,DAY,HOUR,MINUTE,SUP_PRIVATE_TRIPS,SUP_SHARED_TRIPS,Pickup Community Area_x,Dropoff Community Area_x,Trip Seconds_x,...,Trip Total_y,Shared Trip Authorized,Trips Pooled_y,Pickup Centroid Latitude,Pickup Centroid Longitude,Pickup Centroid Location,Dropoff Centroid Latitude,Dropoff Centroid Longitude,Dropoff Centroid Location,DOW


In [5]:




            #iterate through each of the suppressed Community Area pairings
            print('Working on assigning suppressed trips!')



            for day in df2.DAY.unique():
                df5 = pd.DataFrame()
                print('Working on Day ' + str(day))
                #od_trips = od_trips.query("DAY == " + str(day))
                #od_trips = od_trips[od_trips["DAY"] == day]
                df_select1 = df2.loc[df2["DAY"] == day]

                for hour in df_select1.HOUR.unique():
                    df4 = pd.DataFrame()

                    #od_trips = od_trips.query("HOUR == " + str(hour))
                    #od_trips = od_trips[od_trips["HOUR"] == hour]
                    df_select2 = df_select1.loc[df_select1["HOUR"] == hour]

                    for minute in df_select2.MINUTE.unique():
                        df3 = pd.DataFrame()
                        df_select3 = df_select2.loc[df_select2["MINUTE"] == minute]
                        #od_trips = od_trips.query("MINUTE == " + str(minute))
                        #od_trips = od_trips[od_trips["MINUTE"] == minute]

                        for od in df_select3['OD_PAIRS'].unique():
                            #od_trips = df2.loc[(df2["OD_PAIRS"] == od)&(df2["DAY"] == day)&(df2["HOUR"] == hour)&(df2["MINUTE"] == minute)]


                            #select out the trips that are originating from the given community area within the given fifteen minute window



                            #drop the census tract pairs that have unsuppressed trips because they do not need to be assigned any unsuppressed trips


                            #od_trips['DROP_FLAG'] = od_trips.apply(lambda row: assign_suppressed_trips(row, df, od, day, hour, minute), axis = 1)

                            #od_trips = od_trips[od_trips['DROP_FLAG'] == False]
                            od_trips['SCALAR'] = np.random.dirichlet(np.ones(len(od_trips)))
                            
                            print(od_trips)
                            
                            od_trips['SUP_PRIVATE_TRIPS'] = od_trips['SUP_PRIVATE_TRIPS']*od_trips['SCALAR']
                            od_trips['SUP_SHARED_TRIPS'] = od_trips['SUP_SHARED_TRIPS']*od_trips['SCALAR']
                            df3 = df3.append(od_trips)

                        df4 = df4.append(df3)


                    df5 = df5.append(df4)

                df6 = df6.append(df5)

            df6['TOD'] = tod
            #aggregating here to reduce the size of the table as the script moves along
            df6 = df6.groupby(by = ['GEOID_PICKUP', 'GEOID_DROPOFF', 'YEAR','MONTH','DAY','TOD'], as_index = False).agg(agg)

            df7 = df7.append(df6)

        df8 = df8.append(df7)

    df9 = df9.append(df8)

df9 = df9.groupby(by = ['GEOID_PICKUP', 'GEOID_DROPOFF', 'YEAR','MONTH','DAY','TOD'], as_index = False).agg(agg)

NameError: name 'conflation_path' is not defined

## Total Number of Suppressed Trips

In [None]:
df_all.SCALED_SUP_PRIVATE_TRIPS.sum() + df_all.SCALED_SUP_SHARED_TRIPS.sum()

## Aggregate the data to an average weekday by TOD

In [12]:
grouped.to_csv('C:/Workspace/TNC-Demand-Model/Inputs/Chicago Ride-Hailing/Monthly Suppressed Trips.csv')

## Visualize the Suppressed Trips

In [13]:
grouped['TOTAL_SUP_TRIPS'] = grouped.SCALED_SUP_PRIVATE_TRIPS + grouped.SCALED_SUP_SHARED_TRIPS

In [14]:
origin = grouped[['GEOID_PICKUP', 'TOTAL_SUP_TRIPS']].groupby(by = 'GEOID_PICKUP', as_index = False).sum()
dest = grouped[['GEOID_DROPOFF', 'TOTAL_SUP_TRIPS']].groupby(by = 'GEOID_DROPOFF', as_index = False).sum()

In [15]:
geo = gp.read_file('C:/Workspace/TNC-Demand-Model/Inputs/Census Shapefiles/Chicago Tracts/geo_export_558aad9f-98d8-4dd5-a6b1-c1730155d596.shp')

In [16]:
origin[origin['GEOID_PICKUP'] == 17031081403]

Unnamed: 0,GEOID_PICKUP,TOTAL_SUP_TRIPS
137,17031081403,116.343629


In [17]:
origin['GEOID_PICKUP'] = origin.GEOID_PICKUP.astype(float)
dest['GEOID_DROPOFF'] = dest.GEOID_DROPOFF.astype(float)

In [18]:
geo['geoid10'] = geo.geoid10.astype(float)

In [19]:
centroids = pd.read_csv('C:/Workspace/TNC-Demand-Model/otp/points.csv')
centroids = gp.GeoDataFrame(centroids)

In [20]:
geo[geo['geoid10'] == 17031081403]

Unnamed: 0,commarea,commarea_n,countyfp10,geoid10,name10,namelsad10,notes,statefp10,tractce10,geometry
165,8,8.0,31,17031080000.0,814.03,Census Tract 814.03,Small unpopulated area in CA 32,17,81403,"POLYGON ((-87.60953 41.89096, -87.60484 41.891..."


In [21]:
m = folium.Map([41.8781, -87.6298], zoom_start=11)
    
    
# Add the color for the chloropleth:
folium.Choropleth(
 geo_data=geo,
 name= "Suppressed Pickups",
 data=origin,
 columns = ['GEOID_PICKUP', 'TOTAL_SUP_TRIPS'],
 key_on='feature.properties.geoid10',
 fill_color='BuGn',
 fill_opacity=0.6,
 line_opacity=0.2,
 legend_name='Average Weekday Pickups',
 highlight = True
).add_to(m)

folium.Choropleth(
 geo_data=geo,
 name= "Suppressed Dropoffs",
 data=dest,
 columns = ['GEOID_DROPOFF', 'TOTAL_SUP_TRIPS'],
 key_on='feature.properties.geoid10',
 fill_color='BuGn',
 fill_opacity=0.6,
 line_opacity=0.2,
 legend_name='Average Weekday Dropoffs',
 highlight = True
).add_to(m)

feature_group = folium.FeatureGroup(name='Census Tract Centroids', show = False)


for tract2 in centroids.GEOID:
    row = centroids[centroids['GEOID'] == tract2]
    folium.CircleMarker([row['Y'], row['X']], popup = str(int(row['GEOID'].values[0])), radius = 1, fill = True, fill_color = 'grey', color = 'grey').add_to(feature_group)

m.add_child(feature_group)


folium.LayerControl().add_to(m)

m.save('C:/Workspace/TNC-Demand-Model/Data Exploration/Suppressed Ridehailing Maps/Suppressed_Trips.html')

print('Everything is Complete!')

Everything is Complete!
