In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']
period = params['periods']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']

summary_dir = params['summary_dir']

In [3]:
demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
nmt_skims_dir = _join(skims_dir, "active")

walk_speed = params['walk_speed']

In [4]:
#trip roster
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))

### Adding Walk distance to pure walking trips

In [5]:
walk_trips = df_trips[df_trips['trip_mode'] == 4]

nm_skims = omx.open_file(_join(nmt_skims_dir, 'nonmotskm.omx'))
nm_dist = array2df(np.array(nm_skims['DISTWALK']), cols =['orig', 'dest', 'walk_dist'])

walk_trips = pd.merge(walk_trips, nm_dist, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how = 'left')
walk_trips['walk_time'] = (walk_trips['walk_dist']/walk_speed)*60 # convert to minutes

In [6]:
walk_trips['walk_time'].max(), walk_trips['walk_dist'].max()

(60.459200478431676, 3.022960023921584)

### Adding Wacc time + Walk Egress time on Transit Trips 

In [7]:
#df_trips = create_trip_roster(ctramp_dir, transbay_od, geo_cwks, link21_purp_mapping)
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

##### Create OMX file with walk time between OD pairs for all 25 skims

In [8]:
create_rail_wacc_od_pairs(preprocess_dir, transit_skims_dir, period, acc_egg_modes)

Period:  am
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  md
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  pm
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ev
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ea
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR


#### Add walk access time to each Rail inclusive trip

In [9]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    df_od_wacc = omx.open_file(_join(preprocess_dir, "rail_wacc_od_v9_trim_" + period.upper() + ".omx"))
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    df_trn_wlk = pd.merge(df_trn_wlk, df_rail_acc,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_PNR', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_pnr_inb = pd.merge(df_trn_pnr_inb, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'PNR_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_pnr_outbnd = pd.merge(df_trn_pnr_outbnd, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_acc = skim_core_to_df(df_od_wacc, 'WLK_TRN_KNR', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_inb = pd.merge(df_trn_knr_inb, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_acc = skim_core_to_df(df_od_wacc, 'KNR_TRN_WLK', cols =['orig', 'dest', 'wacc']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_outbnd = pd.merge(df_trn_knr_outbnd, df_rail_acc, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)

processing - am
processing - md
processing - pm
processing - ev
processing - ea


In [10]:
df_trn_rail['wacc'] = df_trn_rail['wacc']/100
df_trn_rail['walk_dist'] = df_trn_rail['wacc'] * walk_speed / (60) # convert to miles
df_trn_walk = df_trn_rail[['hh_id', 'person_id', 'trips', 'wacc', 'walk_dist']]

df_walk = walk_trips[['hh_id', 'person_id', 'trips', 'walk_time', 'walk_dist']]
df_walk = pd.concat([df_trn_walk, df_walk], ignore_index=True).reset_index(drop=True)

df_walk['wacc'] = df_walk['wacc'].fillna(0)
df_walk['walk_time'] = df_walk['walk_time'].fillna(0)

df_walk['acc_wt_time'] = df_walk['wacc'] + df_walk['walk_time']

df_walk_jnt = df_walk.loc[df_walk['person_id'].isna()]
df_walk_inm = df_walk.loc[~df_walk['person_id'].isna()]

In [11]:
df_walk_jnt['jt_walk_time'] = df_walk_jnt['acc_wt_time'] #* df_walk_jnt['trips']
df_walk_inm['inm_walk_time'] = df_walk_inm['acc_wt_time'] #* df_walk_inm['trips']

df_walk_jnt['jt_walk_dist'] = df_walk_jnt['walk_dist'] #* df_walk_jnt['trips']
df_walk_inm['inm_walk_dist'] = df_walk_inm['walk_dist'] #* df_walk_inm['trips']

In [12]:
df_walk_jnt

Unnamed: 0,hh_id,person_id,trips,wacc,walk_dist,walk_time,acc_wt_time,jt_walk_time,jt_walk_dist
171,2022672,,2.0,19.159294,0.957965,0.000000,19.159294,19.159294,0.957965
172,2021657,,2.0,19.159294,0.957965,0.000000,19.159294,19.159294,0.957965
377,1916533,,3.0,17.599564,0.879978,0.000000,17.599564,17.599564,0.879978
693,2240843,,2.0,12.143672,0.607184,0.000000,12.143672,12.143672,0.607184
843,2790734,,2.0,18.057547,0.902877,0.000000,18.057547,18.057547,0.902877
...,...,...,...,...,...,...,...,...,...
5899582,1616586,,2.0,0.000000,1.637362,32.747232,32.747232,32.747232,1.637362
5899583,1616421,,2.0,0.000000,0.893177,17.863532,17.863532,17.863532,0.893177
5899584,1616546,,2.0,0.000000,0.810215,16.204306,16.204306,16.204306,0.810215
5899585,1616422,,2.0,0.000000,0.907117,18.142350,18.142350,18.142350,0.907117


In [13]:
df_walk_jnt_wt = df_walk_jnt.groupby(['hh_id', 'trips'])['jt_walk_time', 'jt_walk_dist'].sum().reset_index()
df_walk_jnt_wt = df_walk_jnt_wt.rename(columns={'trips' : 'jt_trips'})

In [14]:
df_walk_inm_wt = df_walk_inm.groupby(['hh_id', 'person_id', 'trips'])['inm_walk_time', 'inm_walk_dist'].sum().reset_index()
df_walk_inm_wt = df_walk_inm_wt.rename(columns={'trips' : 'inm_trips'})

In [15]:
final_df_walk = pd.merge(df_walk_inm_wt, df_walk_jnt_wt, on='hh_id', how='outer')

In [16]:
final_df_walk

Unnamed: 0,hh_id,person_id,inm_trips,inm_walk_time,inm_walk_dist,jt_trips,jt_walk_time,jt_walk_dist
0,2,2.0,1.0,13.688242,0.684412,,,
1,3,3.0,1.0,13.705755,0.685288,,,
2,6,9.0,1.0,34.817734,1.740887,,,
3,8,12.0,1.0,10.688672,0.534434,,,
4,16,29.0,1.0,33.115298,1.655765,,,
...,...,...,...,...,...,...,...,...
2591263,4213041,,,,,4.0,18.150174,0.907509
2591264,4213856,,,,,3.0,18.150174,0.907509
2591265,4214675,,,,,2.0,18.150174,0.907509
2591266,4214688,,,,,4.0,4.908631,0.245432


In [17]:
final_df_walk = final_df_walk.fillna(0)

In [18]:
final_df_walk['walk_time'] = final_df_walk['inm_walk_time'] + final_df_walk['jt_walk_time']
final_df_walk['walk_dist'] = final_df_walk['inm_walk_dist'] + final_df_walk['jt_walk_dist']

final_df_walk['trips'] = final_df_walk['inm_trips'] + final_df_walk['jt_trips']

In [19]:
final_df_walk

Unnamed: 0,hh_id,person_id,inm_trips,inm_walk_time,inm_walk_dist,jt_trips,jt_walk_time,jt_walk_dist,walk_time,walk_dist,trips
0,2,2.0,1.0,13.688242,0.684412,0.0,0.000000,0.000000,13.688242,0.684412,1.0
1,3,3.0,1.0,13.705755,0.685288,0.0,0.000000,0.000000,13.705755,0.685288,1.0
2,6,9.0,1.0,34.817734,1.740887,0.0,0.000000,0.000000,34.817734,1.740887,1.0
3,8,12.0,1.0,10.688672,0.534434,0.0,0.000000,0.000000,10.688672,0.534434,1.0
4,16,29.0,1.0,33.115298,1.655765,0.0,0.000000,0.000000,33.115298,1.655765,1.0
...,...,...,...,...,...,...,...,...,...,...,...
2591263,4213041,0.0,0.0,0.000000,0.000000,4.0,18.150174,0.907509,18.150174,0.907509,4.0
2591264,4213856,0.0,0.0,0.000000,0.000000,3.0,18.150174,0.907509,18.150174,0.907509,3.0
2591265,4214675,0.0,0.0,0.000000,0.000000,2.0,18.150174,0.907509,18.150174,0.907509,2.0
2591266,4214688,0.0,0.0,0.000000,0.000000,4.0,4.908631,0.245432,4.908631,0.245432,4.0


In [20]:
final_df_walk.loc[(final_df_walk['jt_walk_time']>0) & (final_df_walk['inm_walk_time']>0), 'trips'] = 1

In [21]:
person_walk_gt30 = final_df_walk.loc[final_df_walk['walk_time'] > 30]
person_walk_gt30['trips'].sum()

1142757.0

In [22]:
final_df_walk['person_walk_dist'] = final_df_walk['walk_dist'] * final_df_walk['trips']

In [23]:
df_wk_wt = pd.DataFrame(columns = perf_measure_columns)
df_wk_wd = pd.DataFrame(columns = perf_measure_columns)

In [24]:
df_wk_wt = df_wk_wt.append({'Concept_ID' : concept_id,
                      'Metric_ID' : 'B2.4',
                      'Metric_name' : 'Active Mode',
                      'Submetric' : 'B2.4.1',
                      'Description' : 'Count of people walking for more than 30 minutes per day',
                      'Population' : 'Whole Population',
                      'Period' : '',
                      'Geography' : 'Regional',
                      'Zone_ID' : '',
                      'Origin_zone' : '',
                      'Dest_zone' : '',
                      'Purpose' : '',
                      'Value' : person_walk_gt30['trips'].sum(),
                      'Units' : 'number of persons',
                      'Total_Increment' : ''}, ignore_index=True)

In [25]:
df_wk_wd = df_wk_wd.append({'Concept_ID' : concept_id,
                      'Metric_ID' : 'B2.3',
                      'Metric_name' : 'Active Mode',
                      'Submetric' : 'B2.3.1',
                      'Description' : 'Total walk distance by person per day',
                      'Population' : 'Whole Population',
                      'Period' : '',
                      'Geography' : 'Regional',
                      'Zone_ID' : '',
                      'Origin_zone' : '',
                      'Dest_zone' : '',
                      'Purpose' : '',
                      'Value' : final_df_walk['person_walk_dist'].sum(),
                      'Units' : 'miles',
                      'Total_Increment' : ''}, ignore_index=True)

In [26]:
df_wk_wt = df_wk_wt[perf_measure_columns]
df_wk_wd = df_wk_wd[perf_measure_columns]

In [27]:
df_wk_wt.to_csv(_join(summary_dir, "person_walking_gt30min_B2.4.csv"), index=False)
df_wk_wd.to_csv(_join(summary_dir, "total_walk_distance_B2.3.csv"), index=False)

# summarise for prioirty population
summary_cols = params['description_a1.2']

time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []


#regional value
region_value = df_temp.groupby(['Period', 'link21_trip_purp'])['walk_dist'].sum().reset_index()
region_value = region_value.rename(columns={'link21_trip_purp': 'Purpose',
                                           'walk_dist': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'B2.1'
region_value['Metric_name'] = 'B2.1'
region_value['Submetric'] = 'B2.1.1'
region_value['Description'] = 'Active mode access to rail: Total walk distance'
region_value['Population'] = 'Whole Population'
region_value['Period'] = period
region_value['Geography'] = 'Regional'
region_value['Orig_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Zone_ID'] = 'Megaregion'
region_value['Geography'] = 'Regional'
region_value['Units'] = 'miles'
region_value['Metric_name'] = 'Active mode access to rail'  
region_value = region_value[perf_measure_columns]

#county 
county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])

county_df_temp['Period'] = period
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Zone_Id'] = ''
county_df_temp['Geography'] = 'County'
county_df_temp['Description'] = summary_cols[columns][0]
county_df_temp['Metric'] = summary_cols[columns][1]
county_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
county_df_temp['Units'] = summary_cols[columns][2]
county_df_temp['Metric_name'] = summary_cols[columns][3]
county_df.append(county_df_temp)

#super district
sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])

sd_df_temp['Period'] = period
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Zone_ID'] = ''
sd_df_temp['Geography'] = 'Superdistrict'
sd_df_temp['Description'] = summary_cols[columns][0]
sd_df_temp['Metric'] = summary_cols[columns][1]
sd_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
sd_df_temp['Units'] = summary_cols[columns][2]
sd_df_temp['Metric_name'] = summary_cols[columns][3]
sd_df.append(sd_df_temp)

#RDM Zones
rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])

rdm_df_temp['Period'] = period
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Zone_ID'] = ''
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Description'] = summary_cols[columns][0]     
rdm_df_temp['Metric'] = summary_cols[columns][1]
rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
rdm_df_temp['Units'] = summary_cols[columns][2]
rdm_df_temp['Metric_name'] = summary_cols[columns][3]
rdm_df.append(rdm_df_temp)


# transbay region
tb_value = df_temp[df_temp['transbay_od']==1]
tb_value = tb_value[columns].mean()

tb_df_temp = pd.DataFrame({'Population': 'Whole Population',
                           'Period': period,
                           'Value': tb_value,
                           'Orig_zone' : '',
                           'Dest_zone' : '',
                           'Zone_ID' : 'Megaregion',
                           'Geography' : 'Transbay',
                           'Metric':summary_cols[columns][1],
                           'Submetric':summary_cols[columns][1]+'.6',
                           'Description' : summary_cols[columns][0],
                           'Units' : summary_cols[columns][2],
                           'Metric_name' : summary_cols[columns][3]}, index=[0])

