In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']

In [3]:
#Read data

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

#get geogrpahies and priority population in the same file
geo_pp_cwks = pd.merge(geo_cwks, pp_perc, on = 'taz', how = 'left') 

#transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

In [4]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "CTRAMP\main")

# input household and person data
person_file = _join(household_model_dir, 'personData_' + str(iteration) + '.csv')
household_file = _join(household_model_dir, 'householdData_' + str(iteration) + '.csv')

person_file = _join(household_model_dir, 'personData_' + str(iteration) + '.csv')
person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

# taz to RDM zones, super districts, county
#geo_cwks = pd.read_csv(_join(cwks, "geographies.csv")) #columns taz, rdm_zones, super_district, county

# taz to priority population
#pp_perc = pd.read_excel(_join(cwks, "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
# transbay_od = pd.read_csv(_join(cwks, "transbay_od.csv")) #columns = transbay_o, transbay_d

# outputs of CT-RAMP model for tour file
ind_tour = pd.read_csv(_join(household_model_dir, 'indivTourData_' + str(iteration) + '.csv'))
jnt_tour = pd.read_csv(_join(household_model_dir, 'jointTourData_' + str(iteration) + '.csv'))

print("total joint tours:", len(jnt_tour))
print("total inm tours:", len(ind_tour))

jnt_tour['tours'] = 'joint'
ind_tour['tours'] = 'inm'

person = person.rename(columns={'person_num':'PNUM','sampleRate':'sample_rate'})
ind_tour = ind_tour.rename(columns={'person_num':'PNUM','sampleRate':'sample_rate'})
jnt_tour = jnt_tour.rename(columns={'sampleRate':'sample_rate'})

tour0 = jnt_tour[['hh_id','tour_id','tour_participants','tour_mode', 'tour_purpose']]
tour0['JTOUR_ID'] = tour0['tour_id'] + 1
tour0['num'] = tour0['tour_participants'].apply(lambda x: len(list(x.split(" "))))

c = pd.DataFrame(tour0.tour_participants.str.split(" ").to_list()).stack().reset_index(name="PNUM")

tour1 = tour0.loc[tour0.index.repeat(tour0.num)]
tour1['PNUM'] = c['PNUM']
tour1['PNUM'] = tour1['PNUM'].astype(int)
tour1 = tour1.merge(person[['hh_id','person_id','PNUM']], how='left', on = ['hh_id','PNUM'])
tour1.head()

tour1 = tour1[["hh_id","person_id","tour_id","PNUM",'JTOUR_ID']]
tour1_2 = tour1.merge(jnt_tour, how = 'left', on = ["hh_id","tour_id"])
tour1_2.drop(["tour_composition", "tour_participants"], axis=1, inplace=True)

tour2 = ind_tour.copy()
tour2 = tour2.drop(["atWork_freq", "person_type"], axis = 1)
tour2['JTOUR_ID']=0

#Create the tour roster
out_tourdata = pd.concat([tour1_2, tour2])
out_tourdata['TOURID'] = out_tourdata.fillna('')['tour_category'].apply(str) + "." + out_tourdata.fillna('')['tour_purpose'].apply(str) + "." + out_tourdata.fillna('')['tour_id'].apply(str)
out_tourdata['TourType'] = 'Closed'
out_tourdata = out_tourdata.sort_values(by=['hh_id','person_id','start_hour','end_hour'])

#add transbay_od to final tours
out_tourdata = pd.merge(out_tourdata, transbay_od, left_on= ['orig_taz', 'dest_taz'], right_on = ['transbay_o', 'transbay_d'], how = 'left')
out_tourdata['transbay_od'] = out_tourdata['transbay_od'].fillna(0)

out_tourdata = out_tourdata.drop(columns = ['transbay_o', 'transbay_d'])
print(out_tourdata['transbay_od'].value_counts())

#add geographies to final tours
out_tourdata = pd.merge(out_tourdata, geo_cwks, left_on = ['orig_taz'], right_on = ['taz'], how = 'left')
out_tourdata = out_tourdata.rename(columns = {'rdm_zones':'orig_rdm_zones', 
                                            'super_district': 'orig_super_dist',
                                            'county': 'orig_county'})
del out_tourdata['taz']

out_tourdata = pd.merge(out_tourdata, geo_cwks, left_on = ['dest_taz'], right_on = ['taz'], how = 'left')
out_tourdata = out_tourdata.rename(columns = {'rdm_zones':'dest_rdm_zones', 
                                            'super_district': 'dest_super_dist',
                                            'county': 'dest_county'})

del out_tourdata['taz']
out_tourdata.columns

#adding home zone
out_tourdata = pd.merge(out_tourdata, hh, on = 'hh_id', how = 'left')

#add prioirty population
out_tourdata = pd.merge(out_tourdata, pp_perc, left_on = ['home_zone'], right_on = ['taz'], how = 'left')
print("NAs in PP Share:",  out_tourdata['pp_share'].isna().sum())

del out_tourdata['taz']

print("total tours:", len(out_tourdata))
print(out_tourdata['tours'].value_counts())

print("Sum of NAs: ", out_tourdata.isna().sum())

total joint tours: 31635
total inm tours: 1492486
0.0    1520844
1.0      52051
Name: transbay_od, dtype: int64
NAs in PP Share: 0
total tours: 1572895
inm      1492486
joint      80409
Name: tours, dtype: int64
Sum of NAs:  hh_id                    0
person_id            17874
tour_id                  0
PNUM                     0
JTOUR_ID                 0
tour_category            0
tour_purpose             0
orig_taz                 0
orig_walk_segment        0
dest_taz                 0
dest_walk_segment        0
start_hour               0
end_hour                 0
tour_mode                0
num_ob_stops             0
num_ib_stops             0
avAvailable              0
dcLogsum                 0
sample_rate              0
origTaxiWait             0
destTaxiWait             0
origSingleTNCWait        0
destSingleTNCWait        0
origSharedTNCWait        0
destSharedTNCWait        0
tours                    0
TOURID                   0
TourType                 0
transbay_od        

In [5]:
# add periods

In [6]:
#out_tourdata = prepare_tour_roster_df(model_outputs_dir, #
#        cwks_folder, # folder which has crosswalks
#        iteration)

In [7]:
time_period_mapping

{0: 'ev',
 1: 'ev',
 2: 'ev',
 3: 'ea',
 4: 'ea',
 5: 'ea',
 6: 'am',
 7: 'am',
 9: 'am',
 10: 'md',
 11: 'md',
 12: 'md',
 13: 'md',
 14: 'md',
 15: 'pm',
 16: 'pm',
 17: 'pm',
 18: 'pm',
 19: 'ev',
 20: 'ev',
 21: 'ev',
 22: 'ev',
 23: 'ev'}

In [8]:
out_tourdata['Period'] = out_tourdata['start_hour'].map(time_period_mapping)
out_tourdata['Period'].value_counts()

am    478483
md    447012
pm    268537
ev     58069
ea     55883
Name: Period, dtype: int64

In [9]:
#data inputs 
df_tours = out_tourdata.copy()
df_tours['tours'] = 1

### Regional Mode Share

In [10]:
#df_region_share = df_tours.groupby(['tour_mode', 'tour_purpose'])['tours'].sum().reset_index()

In [11]:
#regional mode share
# df_region_share['Value'] = df_region_share['tours']/df_region_share['tours'].sum()
# df_region_share = df_region_share.rename(columns={'tour_mode': 'Mode', 'tour_purpose': 'Purpose'})
# df_region_share = df_region_share[['Mode', 'Purpose', 'Value']]

# df_region_share['Concept_ID'] = concept_id
# df_region_share['Metric_ID'] = 'A3.3'
# df_region_share['Metric_name'] = 'Mode Shares'
# df_region_share['Submetric'] = 'A3.3.1'
# df_region_share['Description'] = 'Regional mode share'
# df_region_share['Population'] = 'Whole Population'
# df_region_share['Period'] = ''
# df_region_share['Geography'] = 'Region'
# df_region_share['Zone_ID'] = 'Megaregion'
# df_region_share['Origin_zone'] = ''
# df_region_share['Dest_zone'] = ''
# df_region_share['Dest_zone'] = ''
# df_region_share['Units'] = 'Percentage share'
# df_region_share['Total_Increment'] = 'Increment'

### Regional mode share by period

In [12]:
df_region_period = df_tours.groupby(['tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_region_period['Value'] = df_region_period['tours']/df_region_period['tours'].sum()
df_region_period = df_region_period.rename(columns={'tour_mode': 'Mode', 'tour_purpose': 'Purpose'})
df_region_period = df_region_period[['Mode', 'Period', 'Purpose', 'Value']]

df_region_period['Concept_ID'] = concept_id
df_region_period['Metric_ID'] = 'A3.3'
df_region_period['Metric_name'] = 'Mode Shares'
df_region_period['Submetric'] = 'A3.3.1'
df_region_period['Description'] = 'Regional mode share by time period'
df_region_period['Population'] = 'Whole Population'
df_region_period['Geography'] = 'Region'
df_region_period['Zone_ID'] = 'Megaregion'
df_region_period['Origin_zone'] = ''
df_region_period['Dest_zone'] = ''
df_region_period['Units'] = 'Percentage share'
df_region_period['Total_Increment'] = 'Increment'

In [15]:
df_region_period

Unnamed: 0,Mode,Period,Purpose,Value,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Geography,Zone_ID,Origin_zone,Dest_zone,Units,Total_Increment
0,1,am,atwork_business,3.715642e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
1,1,am,atwork_eat,1.496196e-03,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
2,1,am,atwork_maint,2.981688e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
3,1,am,eatout,7.821197e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
4,1,am,othdiscr,4.789049e-03,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,9,pm,university,3.104014e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
657,9,pm,work_high,3.822677e-06,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
658,9,pm,work_low,7.645353e-07,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
659,9,pm,work_med,7.645353e-07,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment


## Mode Share in Transbay split

In [16]:
df_tb = df_tours.groupby(['transbay_od', 'tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_tb = df_tb.loc[df_tb['transbay_od'] == 1]

df_tb['Value'] = df_tb['tours']/df_tb['tours'].sum()
df_tb = df_tb.rename(columns={'tour_mode': 'Mode', 'tour_purpose': 'Purpose', 'transbay_od' : 'Zone_ID'})
df_tb = df_tb[['Mode', 'Period', 'Purpose', 'Value']]

df_tb['Concept_ID'] = concept_id
df_tb['Metric_ID'] = 'A3.3'
df_tb['Metric_name'] = 'Mode Shares'
df_tb['Submetric'] = 'A3.3.2'
df_tb['Description'] = 'Regional mode share by time period in transbay region'
df_tb['Population'] = 'Whole Population'
df_tb['Geography'] = 'Transbay region'
#df_region_period['Zone_ID'] =
df_tb['Origin_zone'] = ''
df_tb['Dest_zone'] = ''
df_tb['Units'] = 'Percentage share'
df_tb['Total_Increment'] = 'Increment'

In [17]:
df_tb = df_tb.loc[df_tb['transbay_od'] == 1]

In [18]:
df_tb['Value'] = df_tb['tours']/df_tb['tours'].sum()
df_tb = df_tb.rename(columns={'tour_mode': 'Mode', 'tour_purpose': 'Purpose', 'transbay_od' : 'Zone_ID'})
df_tb = df_tb[['Mode', 'Period', 'Purpose', 'Value']]

In [19]:
df_tb['Concept_ID'] = concept_id
df_tb['Metric_ID'] = 'A3.3'
df_tb['Metric_name'] = 'Mode Shares'
df_tb['Submetric'] = 'A3.3.2'
df_tb['Description'] = 'Regional mode share by time period'
df_tb['Population'] = 'Whole Population'
df_tb['Geography'] = 'Transbay region'
#df_region_period['Zone_ID'] =
df_tb['Origin_zone'] = ''
df_tb['Dest_zone'] = ''
df_tb['Units'] = 'Percentage share'
df_tb['Total_Increment'] = 'Increment'

In [20]:
df_region_period

Unnamed: 0,Mode,Period,Purpose,Value,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Geography,Zone_ID,Origin_zone,Dest_zone,Units,Total_Increment
0,1,am,atwork_business,3.715642e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
1,1,am,atwork_eat,1.496196e-03,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
2,1,am,atwork_maint,2.981688e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
3,1,am,eatout,7.821197e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
4,1,am,othdiscr,4.789049e-03,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,9,pm,university,3.104014e-04,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
657,9,pm,work_high,3.822677e-06,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
658,9,pm,work_low,7.645353e-07,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment
659,9,pm,work_med,7.645353e-07,BaseYear2015,A3.3,Mode Shares,A3.3.1,Regional mode share by time period,Whole Population,Region,Megaregion,,,Percentage share,Increment


### Mode Share by County

In [21]:
df_cnty = df_tours.groupby(['orig_county', 'dest_county', 'tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_cnty['Value'] = df_cnty['tours']/df_cnty['tours'].sum()
df_cnty = df_cnty.rename(columns={'tour_mode': 'Mode', 
                                  'tour_purpose': 'Purpose', 
                                  'orig_county' : 'Origin_zone',
                                  'dest_county' : 'Dest_zone'})
df_cnty = df_cnty[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Value']]

df_cnty['Concept_ID'] = concept_id
df_cnty['Metric_ID'] = 'A3.3'
df_cnty['Metric_name'] = 'Mode Shares'
df_cnty['Submetric'] = 'A3.3.3'
df_cnty['Description'] = 'Regional mode share by time period and origin and destination county'
df_cnty['Population'] = 'Whole Population'
df_cnty['Geography'] = 'County'
df_cnty['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_cnty['Units'] = 'Percentage share'
df_cnty['Total_Increment'] = 'Increment'

In [23]:
df_cnty

Unnamed: 0,Origin_zone,Dest_zone,Mode,Period,Purpose,Value
0,1,1,1,am,atwork_business,7.645353e-06
1,1,1,1,am,atwork_eat,3.134595e-05
2,1,1,1,am,atwork_maint,6.116283e-06
3,1,1,1,am,eatout,1.146803e-05
4,1,1,1,am,othdiscr,6.039829e-05
...,...,...,...,...,...,...
35876,9,9,9,pm,othdiscr,7.645353e-07
35877,9,9,9,pm,othmaint,1.529071e-06
35878,9,9,9,pm,shopping,2.293606e-06
35879,9,9,9,pm,social,7.645353e-07


### Mode Share by RDM Zones

In [25]:
df_rdm = df_tours.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_rdm['Value'] = df_rdm['tours']/df_rdm['tours'].sum()
df_rdm = df_rdm.rename(columns={'tour_mode': 'Mode', 
                                'tour_purpose': 'Purpose', 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Value']]

df_rdm['Concept_ID'] = concept_id
df_rdm['Metric_ID'] = 'A3.3'
df_rdm['Metric_name'] = 'Mode Shares'
df_rdm['Submetric'] = 'A3.3.4'
df_rdm['Description'] = 'Regional mode share by time period and origin and destination RDM Zones'
df_rdm['Population'] = 'Whole Population'
df_rdm['Geography'] = 'RDM'
df_rdm['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_rdm['Units'] = 'Percentage share'
df_rdm['Total_Increment'] = 'Increment'

### Mode Share by Super District

In [28]:
df_sd = df_tours.groupby(['orig_super_dist', 'dest_super_dist', 'tour_mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_sd['Value'] = df_sd['tours']/df_sd['tours'].sum()

df_sd = df_sd.rename(columns={'tour_mode': 'Mode', 
                              'tour_purpose': 'Purpose', 
                              'orig_super_dist' : 'Origin_zone',
                              'dest_super_dist' : 'Dest_zone'})
df_sd = df_sd[['Origin_zone', 'Dest_zone', 'Mode', 'Period', 'Purpose', 'Value']]

df_sd['Concept_ID'] = concept_id
df_sd['Metric_ID'] = 'A3.3'
df_sd['Metric_name'] = 'Mode Shares'
df_sd['Submetric'] = 'A3.3.5'
df_sd['Description'] = 'Regional mode share by time period and origin and destination super districts'
df_sd['Population'] = 'Whole Population'
df_sd['Geography'] = 'Super district'
df_sd['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
df_sd['Units'] = 'Percentage share'
df_sd['Total_Increment'] = 'Increment'

In [31]:
#df_shares = pd.concat([df_region_period, df_cnty, df_rdm, df_sd], ignore_index=False)

In [32]:
#df_shares.to_csv(_join())

In [1]:
#with pd.ExcelWriter(os.path.join(summary_outputs, 'concept-BY15.xlsx'), engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
#    df_region_period.to_excel(writer, sheet_name='A3.3.1', startcol=0, index=False)
#    df_tb.to_excel(writer, sheet_name='A3.3.2', startcol=0, index=False)
#    df_cnty.to_excel(writer, sheet_name='A3.3.3', startcol=0, index=False)
#    df_rdm.to_excel(writer, sheet_name='A3.3.4', startcol=0, index=False)
#    df_sd.to_excel(writer, sheet_name='A3.3.5', startcol=0, index=False)

In [None]:
#tab1 = df_tours.groupby(['tour_mode', 'transbay_od','tour_purpose', 'pp_share'])['flag'].count().reset_index()
#tab2 = df_tours.groupby(['tour_mode', 'start_hour'])['flag'].count().reset_index()
#tab3 = df_tours.groupby(['tour_mode', 'orig_county', 'dest_county'])['flag'].count().reset_index()
#tab4 = df_tours.groupby(['tour_mode', 'orig_super_dist', 'dest_super_dist'])['flag'].count().reset_index()
#tab4 = df_tours.groupby(['tour_mode', 'orig_rdm_zones', 'dest_rdm_zones'])['flag'].count().reset_index()
#tab1