In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']
period = params['periods']

In [3]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
nmt_skims_dir = _join(skims_dir, "active")

walk_speed = params['walk_speed']

perf_measure_columns = params['final_columns']

In [5]:
#trip roster
df_trips = create_trip_roster(ctramp_dir, hh, pp_perc ,transbay_od, geo_cwks, link21_purp_mapping, iteration)

NAs in PP Share: 0


In [7]:
df_trips['Period'] = df_trips['depart_hour'].map(time_period_mapping)
df_trips['Mode'] = df_trips['trip_mode'].map(mode_cat_mapping)

### Adding Walk distance to pure walking trips

In [8]:
walk_trips = df_trips[df_trips['trip_mode'] == 4]

nm_skims = omx.open_file(_join(nmt_skims_dir, 'nonmotskm.omx'))
nm_dist = array2df(np.array(nm_skims['DISTWALK']), cols =['orig', 'dest', 'walk_dist'])

walk_trips = pd.merge(walk_trips, nm_dist, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how = 'left')
walk_trips['walk_time'] = (walk_trips['walk_dist']/walk_speed)*60 # convert to minutes

### Adding Wacc time + Walk Egress time on Transit Trips 

In [11]:
#df_trips = create_trip_roster(ctramp_dir, transbay_od, geo_cwks, link21_purp_mapping)
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

##### Create OMX file with walk time between OD pairs for all 25 skims

In [None]:
create_rail_wacc_od_pairs(transit_demand_dir, transit_skims_dir, period, acc_egg_modes)

#### Add walk access time to each Rail inclusive trip

In [10]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_pr = omx.open_file(_join(transit_demand_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    df_od_wacc = omx.open_file(_join(transit_demand_dir, "rail_wacc_od_v9_trim_" + period.upper() + ".omx"))
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    df_trn_wlk = pd.merge(df_trn_wlk, df_rail_acc,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_PNR', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_pnr_inb = pd.merge(df_trn_pnr_inb, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'PNR_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_pnr_outbnd = pd.merge(df_trn_pnr_outbnd, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_KNR', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_inb = pd.merge(df_trn_knr_inb, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'KNR_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_outbnd = pd.merge(df_trn_knr_outbnd, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)

processing - am
processing - md
processing - pm
processing - ev
processing - ea


In [67]:
df_trn_rail['wacc'] = df_trn_rail['wacc']/100

In [68]:
df_trn_rail['walk_dist'] = df_trn_rail['wacc'] * walk_speed / (60) # convert to miles

In [69]:
df_trn_walk = df_trn_rail[['hh_id', 'person_id', 'trips', 'wacc']]

In [70]:
df_walk = walk_trips[['hh_id', 'person_id', 'trips', 'walk_time']]

In [71]:
df_walk = pd.concat([df_trn_walk, df_walk], ignore_index=True).reset_index(drop=True)

In [72]:
df_walk['wacc'] = df_walk['wacc'].fillna(0)
df_walk['walk_time'] = df_walk['walk_time'].fillna(0)

In [73]:
df_walk['total_walk_time'] = df_walk['wacc'] + df_walk['walk_time']

In [74]:
df_walk_jnt = df_walk.loc[df_walk['person_id'].isna()]

In [76]:
df_walk_jnt_gt30 = df_walk_jnt.loc[df_walk_jnt['total_walk_time'] > 30]
df_walk_jnt_gt30['trips'].sum()

493.33333333333337

In [80]:
df_walk_inm = df_walk.loc[~df_walk['person_id'].isna()]
df_walk_inm = df_walk_inm.groupby(['person_id', 'trips'])['total_walk_time'].sum().reset_index()

In [81]:
df_walk_inm_gt30 = df_walk_inm.loc[df_walk_inm['total_walk_time'] > 30]
df_walk_inm_gt30['trips'].sum()

691233.3333333337

In [83]:
df_wk = pd.DataFrame(columns = perf_measure_columns)

In [84]:
df_wk = df_wk.append({'Concept_ID' : concept_id,
                      'Metric_ID' : 'B2.2',
                      'Metric_name' : 'Active Mode',
                      'Submetric' : 'B2.2.1',
                      'Description' : 'Count of people walking for more than 30 minutes',
                      'Population' : 'Whole Population',
                      'Period' : '',
                      'Geography' : 'Regional',
                      'Zone_ID' : '',
                      'Origin_zone' : '',
                      'Dest_zone' : '',
                      'Purpose' : '',
                      'Value' : df_walk_inm_gt30['trips'].sum() + df_walk_jnt_gt30['trips'].sum(),
                      'Units' : 'Count',
                      'Total_Increment' : ''}, ignore_index=True)

In [85]:
df_wk

Unnamed: 0,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Period,Geography,Zone_ID,Origin_zone,Dest_zone,Purpose,Value,Units,Total_Increment
0,BaseYear2015,B2.2,Active Mode,B2.2.1,Count of people walking for more than 30 minutes,Whole Population,,Regional,,,,,691726.666667,Count,


#### TLFDs of Walk Distance 

In [86]:
df_trn_rail

Unnamed: 0,hh_id,person_id,person_num,tour_id,stop_id,inbound,tour_purpose,orig_purpose,dest_purpose,orig_taz,...,link21_trip_purp,Period,Mode,orig_x,dest_x,rail_od,orig_y,dest_y,wacc,walk_dist
0,1282915,3406290.0,1.0,11,-1,0,atwork_eat,Work,atwork,2302,...,work,am,WALK_TRANSIT,2302,607,1.0,2302,607,7.911990,0.395599
1,1356790,3547605.0,1.0,11,-1,0,atwork_eat,Work,atwork,2201,...,work,am,WALK_TRANSIT,2201,607,1.0,2201,607,19.585566,0.979278
2,1372915,3582807.0,1.0,11,1,0,atwork_maint,othmaint,atwork,444,...,othmaint,am,WALK_TRANSIT,444,61,1.0,444,61,12.486686,0.624334
3,1408517,3661287.0,1.0,0,-1,0,work_low,Home,work,444,...,work,am,WALK_TRANSIT,444,61,1.0,444,61,12.486686,0.624334
4,1615841,4265646.0,2.0,11,-1,0,atwork_maint,Work,atwork,674,...,work,am,WALK_TRANSIT,674,607,1.0,674,607,15.145755,0.757288
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2282,1135470,3099279.0,1.0,0,-1,0,work_low,Home,work,3083,...,work,ea,KNR_TRANSIT,3083,1820,1.0,3083,1820,10.528389,0.526419
2283,2596221,7085165.0,2.0,0,-1,0,university,Home,university,3104,...,school,ea,KNR_TRANSIT,3104,339,1.0,3104,339,6.949893,0.347495
2284,1043156,2863911.0,1.0,0,-1,0,work_high,Home,work,3277,...,work,ea,KNR_TRANSIT,3277,2415,1.0,3277,2415,9.869334,0.493467
2285,1085728,2968520.0,1.0,0,-1,0,work_low,Home,work,3301,...,work,ea,KNR_TRANSIT,3301,88,1.0,3301,88,7.910476,0.395524


In [25]:
df_trn_rail['walk_dist'] = df_trn_rail['wacc'] * walk_speed / (60 * 100)

In [26]:
df_trn_rail['walk_dist'].describe()

count    58940.000000
mean         0.548976
std          0.323604
min          0.001135
25%          0.278810
50%          0.525060
75%          0.748136
max          1.492653
Name: walk_dist, dtype: float64

In [29]:
df_trn_rail.columns

Index(['hh_id', 'tour_id', 'stop_id', 'inbound', 'tour_purpose',
       'orig_purpose', 'dest_purpose', 'orig_taz', 'dest_taz', 'depart_hour',
       'trip_mode', 'tour_mode', 'tour_category', 'tours', 'transbay_od',
       'orig_rdm_zones', 'orig_super_dist', 'orig_county', 'dest_rdm_zones',
       'dest_super_dist', 'dest_county', 'home_zone', 'pp_share',
       'new_dest_purp', 'new_orig_purp', 'link21_tour_purp',
       'link21_orig_purp', 'link21_dest_purp', 'link21_trip_purp', 'trips',
       'Period', 'Mode', 'orig_x', 'dest_x', 'rail_od', 'orig_y', 'dest_y',
       'wacc', 'walk_dist'],
      dtype='object')

In [31]:
perf_measure_columns

['Concept_ID',
 'Metric_ID',
 'Metric_name',
 'Submetric',
 'Description',
 'Population',
 'Period',
 'Geography',
 'Zone_ID',
 'Origin_zone',
 'Dest_zone',
 'Purpose',
 'Value',
 'Units',
 'Total_Increment']

In [None]:
# summarise for prioirty population
summary_cols = params['description_a1.2']

time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []


#regional value
region_value = df_temp.groupby(['Period', 'link21_trip_purp'])['walk_dist'].sum().reset_index()
region_value = region_value.rename(columns={'link21_trip_purp': 'Purpose',
                                           'walk_dist': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'B2.1'
region_value['Metric_name'] = 'B2.1'
region_value['Submetric'] = 'B2.1.1'
region_value['Description'] = 'Active mode access to rail: Total walk distance'
region_value['Population'] = 'Whole Population'
region_value['Period'] = period
region_value['Geography'] = 'Regional'
region_value['Orig_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Zone_ID'] = 'Megaregion'
region_value['Geography'] = 'Regional'
region_value['Units'] = 'miles'
region_value['Metric_name'] = 'Active mode access to rail'  
region_value = region_value[perf_measure_columns]

#county 
county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])

county_df_temp['Period'] = period
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Zone_Id'] = ''
county_df_temp['Geography'] = 'County'
county_df_temp['Description'] = summary_cols[columns][0]
county_df_temp['Metric'] = summary_cols[columns][1]
county_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
county_df_temp['Units'] = summary_cols[columns][2]
county_df_temp['Metric_name'] = summary_cols[columns][3]
county_df.append(county_df_temp)

#super district
sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])

sd_df_temp['Period'] = period
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Zone_ID'] = ''
sd_df_temp['Geography'] = 'Superdistrict'
sd_df_temp['Description'] = summary_cols[columns][0]
sd_df_temp['Metric'] = summary_cols[columns][1]
sd_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
sd_df_temp['Units'] = summary_cols[columns][2]
sd_df_temp['Metric_name'] = summary_cols[columns][3]
sd_df.append(sd_df_temp)

#RDM Zones
rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])

rdm_df_temp['Period'] = period
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Zone_ID'] = ''
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Description'] = summary_cols[columns][0]     
rdm_df_temp['Metric'] = summary_cols[columns][1]
rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
rdm_df_temp['Units'] = summary_cols[columns][2]
rdm_df_temp['Metric_name'] = summary_cols[columns][3]
rdm_df.append(rdm_df_temp)


# transbay region
tb_value = df_temp[df_temp['transbay_od']==1]
tb_value = tb_value[columns].mean()

tb_df_temp = pd.DataFrame({'Population': 'Whole Population',
                           'Period': period,
                           'Value': tb_value,
                           'Orig_zone' : '',
                           'Dest_zone' : '',
                           'Zone_ID' : 'Megaregion',
                           'Geography' : 'Transbay',
                           'Metric':summary_cols[columns][1],
                           'Submetric':summary_cols[columns][1]+'.6',
                           'Description' : summary_cols[columns][0],
                           'Units' : summary_cols[columns][2],
                           'Metric_name' : summary_cols[columns][3]}, index=[0])

tb_df.append(tb_df_temp)                     
    
    
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)