In [43]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
import pantab

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [3]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

In [4]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

perf_measure_columns = params['final_columns']

period = params['periods']

In [5]:
df_trips = create_trip_roster(ctramp_dir, hh, pp_perc ,transbay_od, geo_cwks, link21_purp_mapping, iteration)

NAs in PP Share: 0


In [94]:
trip_purp_dict = {"work": 1,
                "shopping":2,    
                "escort":3,       
                "social": 4,      
                "school" : 5,     
                "othdiscr": 6,    
                "othmaint":7,     
                "business": 8 } 

pp_dict = {"equity_trips" : 1, 
           "nonequity_trips" : 0}

dtype = { 'orig_super_dist' : 'int16', 
          'dest_super_dist' : 'int16',
          'orig_county' : 'int16',
          'dest_county' : 'int16',
          'transbay_od' : 'int16', 
          'trip_purpose' : 'int16',
          'trip_mode' : 'int16',
          'depart_hour' : 'int16',
          'priority_population' : 'int16',
          'trips' : 'float32',
          'scenario' : str}

In [95]:
df_trips.columns

Index(['hh_id', 'person_id', 'person_num', 'tour_id', 'stop_id', 'inbound',
       'tour_purpose', 'orig_purpose', 'dest_purpose', 'orig_taz',
       'orig_walk_segment', 'dest_taz', 'dest_walk_segment', 'parking_taz',
       'depart_hour', 'trip_mode', 'tour_mode', 'tour_category', 'avAvailable',
       'sampleRate', 'taxiWait', 'singleTNCWait', 'sharedTNCWait', 'trip_type',
       'trips', 'num_participants', 'transbay_od', 'orig_rdm_zones',
       'orig_super_dist', 'orig_county', 'dest_rdm_zones', 'dest_super_dist',
       'dest_county', 'home_zone', 'pp_share', 'new_dest_purp',
       'new_orig_purp', 'link21_tour_purp', 'link21_orig_purp',
       'link21_dest_purp', 'link21_trip_purp', 'trip_purpose'],
      dtype='object')

In [96]:
df_trips['trip_purpose'] = df_trips['link21_trip_purp'].map(trip_purp_dict)

In [97]:
df = df_trips.groupby(['orig_super_dist', 'dest_super_dist',
                       'orig_county', 'dest_county', 'transbay_od', 
                       'trip_purpose', 'trip_mode', 'depart_hour', 
                       'pp_share'])['trips'].sum().reset_index()

df['equity_trips'] = df['trips']*df['pp_share']/100
df['nonequity_trips'] = df['trips'] - df['equity_trips']

df = pd.melt(df[df.trips != 0].drop(columns = ['pp_share','trips'], axis = 1), 
               id_vars = ['orig_super_dist','dest_super_dist',
                          'orig_county','dest_county','transbay_od',
                          'trip_purpose','trip_mode','depart_hour'],
               value_vars = ['equity_trips', 'nonequity_trips'],
               var_name = 'eq_trips',
               value_name = 'trips'
               )

df['priority_population'] = df['eq_trips'].map(pp_dict)
del df['eq_trips']

df['scenario'] = "BY2050"
df['trips'] = round(df['trips'])
df = df.loc[df['trips'] > 0].reset_index(drop=True)

df = df.astype(dtype)

In [98]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3158177 entries, 0 to 3158176
Data columns (total 11 columns):
 #   Column               Dtype  
---  ------               -----  
 0   orig_super_dist      int16  
 1   dest_super_dist      int16  
 2   orig_county          int16  
 3   dest_county          int16  
 4   transbay_od          int16  
 5   trip_purpose         int16  
 6   trip_mode            int16  
 7   depart_hour          int16  
 8   trips                float32
 9   priority_population  int16  
 10  scenario             object 
dtypes: float32(1), int16(9), object(1)
memory usage: 90.4+ MB


In [99]:
df2 = df.copy(deep =True)

In [100]:
df2

Unnamed: 0,orig_super_dist,dest_super_dist,orig_county,dest_county,transbay_od,trip_purpose,trip_mode,depart_hour,trips,priority_population,scenario
0,1,1,1,1,0,1,1,7,7.0,1,BY2050
1,1,1,1,1,0,1,1,14,13.0,1,BY2050
2,1,1,1,1,0,1,1,16,13.0,1,BY2050
3,1,1,1,1,0,1,1,17,7.0,1,BY2050
4,1,1,1,1,0,1,1,20,7.0,1,BY2050
...,...,...,...,...,...,...,...,...,...,...,...
3158172,32,32,9,9,0,8,4,11,27.0,0,BY2050
3158173,32,32,9,9,0,8,4,12,47.0,0,BY2050
3158174,32,32,9,9,0,8,4,13,47.0,0,BY2050
3158175,32,32,9,9,0,8,4,13,4.0,0,BY2050


In [101]:
df2['scenario'] = "R36"
df2['trips'] = round(df2['trips']*0.90)

In [102]:
df2 = df2.astype(dtype)

In [103]:
final_df = pd.concat([df, df2], ignore_index=True).reset_index(drop=True)

In [104]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6316354 entries, 0 to 6316353
Data columns (total 11 columns):
 #   Column               Dtype  
---  ------               -----  
 0   orig_super_dist      int16  
 1   dest_super_dist      int16  
 2   orig_county          int16  
 3   dest_county          int16  
 4   transbay_od          int16  
 5   trip_purpose         int16  
 6   trip_mode            int16  
 7   depart_hour          int16  
 8   trips                float32
 9   priority_population  int16  
 10  scenario             object 
dtypes: float32(1), int16(9), object(1)
memory usage: 180.7+ MB


In [107]:
fpath = r"C:\Users\vyadav\OneDrive - Cambridge Systematics\CS-Projects\LINK21"
pantab.frame_to_hyper(final_df, _join(fpath, "data/trips.hyper"), table="trips")

In [142]:
final_df['priority_population'].value_counts()

0    4780270
1    1536084
Name: priority_population, dtype: int64

In [144]:
final_df['depart_hour'].value_counts()

8     534024
18    527280
17    509094
7     455250
15    439540
16    423302
13    389308
14    369208
12    341836
10    329220
11    323884
9     299762
21    286804
19    284044
20    270956
6     227656
5     126382
22    105552
23     73252
Name: depart_hour, dtype: int64

In [109]:
import geopandas as gpd

In [131]:
gis_path = gpd.read_file(r"C:\Users\vyadav\OneDrive - Cambridge Systematics\CS-Projects\LINK21\gis\Link21_TAZ_FINAL\Link21_TAZ_FINAL.shp")

In [132]:
cwk = pd.read_csv(r"C:\Users\vyadav\OneDrive - Cambridge Systematics\CS-Projects\LINK21\gis\geographies.csv")

In [133]:
gis_path

Unnamed: 0,ID,SHAPE_LENG,SHAPE_AREA,LINK21_TAZ,AREA_ACRE,AREA_SQMI,geometry
0,1,0.095604,0.000241,1,581.609086,0.908761,"POLYGON ((-122.50538 37.73557, -122.50526 37.7..."
1,2,0.069933,0.000077,2,185.990539,0.290609,"POLYGON ((-122.37331 37.73853, -122.37320 37.7..."
2,3,0.215405,0.000078,3,188.828192,0.295043,"POLYGON ((-122.41992 37.81157, -122.41972 37.8..."
3,4,0.024365,0.000029,4,69.880045,0.109187,"POLYGON ((-122.41682 37.80489, -122.41605 37.8..."
4,5,0.043329,0.000052,5,126.490965,0.197642,"POLYGON ((-122.42284 37.81026, -122.42137 37.8..."
...,...,...,...,...,...,...,...
3327,3328,0.041648,0.000056,1525,136.063695,0.212599,"POLYGON ((-121.89295 37.43221, -121.89285 37.4..."
3328,3329,0.020490,0.000026,1526,63.786395,0.099666,"POLYGON ((-121.91318 37.32078, -121.91373 37.3..."
3329,3330,0.056447,0.000102,1527,247.335858,0.386461,"POLYGON ((-121.90028 37.43203, -121.89980 37.4..."
3330,3331,0.030479,0.000035,1528,85.595420,0.133742,"POLYGON ((-121.88053 37.41372, -121.88120 37.4..."


In [134]:
cwk

Unnamed: 0,taz,rdm_zones,super_district,county
0,1,75,26,4
1,2,32,32,8
2,3,48,16,4
3,4,25,11,6
4,5,139,23,7
...,...,...,...,...
3327,3328,18,17,4
3328,3329,229,13,1
3329,3330,287,9,1
3330,3331,140,6,8


In [135]:
gis_file = pd.merge(gis_path, cwk, left_on='ID', right_on='taz', how='left')
gis_file = gis_file[['super_district', 'county', 'geometry']]

In [136]:
gis_file

Unnamed: 0,super_district,county,geometry
0,26,4,"POLYGON ((-122.50538 37.73557, -122.50526 37.7..."
1,32,8,"POLYGON ((-122.37331 37.73853, -122.37320 37.7..."
2,16,4,"POLYGON ((-122.41992 37.81157, -122.41972 37.8..."
3,11,6,"POLYGON ((-122.41682 37.80489, -122.41605 37.8..."
4,23,7,"POLYGON ((-122.42284 37.81026, -122.42137 37.8..."
...,...,...,...
3327,17,4,"POLYGON ((-121.89295 37.43221, -121.89285 37.4..."
3328,13,1,"POLYGON ((-121.91318 37.32078, -121.91373 37.3..."
3329,9,1,"POLYGON ((-121.90028 37.43203, -121.89980 37.4..."
3330,6,8,"POLYGON ((-121.88053 37.41372, -121.88120 37.4..."


In [137]:
df2 = gis_file.dissolve(by=['super_district', 'county'])

In [139]:
df2 = df2.reset_index()

In [141]:
df2.to_file(r"C:\Users\vyadav\OneDrive - Cambridge Systematics\CS-Projects\LINK21\gis\Link21_TAZ_FINAL\geographies_cwk.shp")

In [3]:
#print(df.head())
print(df01.head())

   orig_taz  dest_taz  orig_rdm_zones  dest_rdm_zones  orig_super_dist  \
0         1         1              75              75               26   
1         1         1              75              75               26   
2         1         1              75              75               26   
3         1         1              75              75               26   
4         1         1              75              75               26   

   dest_super_dist  orig_county  dest_county  transbay_od  trip_purpose  \
0               26            4            4          0.0             9   
1               26            4            4          0.0             3   
2               26            4            4          0.0             3   
3               26            4            4          0.0             3   
4               26            4            4          0.0             3   

   trip_mode  depart_hour scenario      eq_trips  num_trips  
0          4           11    case1  equity