In [1]:
import geopandas as gpd
import pandas as pd
import os

In [2]:
# show all columns
pd.options.display.max_columns = None

## Join Bike volume data to links

In [None]:
# read in links csv
links = gpd.read_file(r".\Data\links.csv")

# read in links shapefile
links_shp = gpd.read_file(r".\Data\links.shp")
links.shape

In [None]:
# read in disc trips
bike_volume = pd.read_csv(r".\Data\bike_vol.csv")

# fill bike volume NAs with -1
bike_volume['bike_vol'] = bike_volume['bike_vol'].fillna(-1)

In [None]:
# Create FTkey and TF key to use for joining to links
bike_volume['FTkey'] = bike_volume['from_node'].astype(str) + "_"+ bike_volume['to_node'].astype(str)
bike_volume['TFkey'] = bike_volume['to_node'].astype(str) + "_"+ bike_volume['from_node'].astype(str) 
bike_volume.head(15)

In [None]:
# Create FTkey and TF key to use for joining to bike volumes
links['FTkey'] = links['from_node'].astype(str) + "_" + links['to_node'].astype(str)
links['TFkey'] = links['to_node'].astype(str) + "_" + links['from_node'].astype(str) 
links.head(15)

In [None]:
# examine duplicates
# duplicates = pd.concat(g for _, g in bike_volume.groupby("key") if len(g) > 1)

In [None]:
# duplicates

In [None]:
# Drop duplicates
bike_vol_no_dup_ft = bike_volume.drop_duplicates(subset=['FTkey'], keep=False)
bike_vol_no_dup_tf = bike_volume.drop_duplicates(subset=['TFkey'], keep=False)

print(bike_vol_no_dup_ft.shape)
print(bike_vol_no_dup_tf.shape)
# duplicates = pd.concat(g for _, g in bike_vol_no_dup.groupby(['FTkey']) if len(g) > 1)
# duplicates

In [None]:
# copy the links table
links2 = links[['link_id', 'FTkey', 'TFkey']].copy()

# join the links with the bike volumes using the common keys
link_bike_vol_ft = links2.merge(bike_vol_no_dup_ft, left_on='FTkey', right_on='FTkey', how='left')
link_bike_vol_tf = links2.merge(bike_vol_no_dup_tf, left_on='TFkey', right_on='TFkey', how='left')

# examine the results
print(links2.shape)
print(link_bike_vol_ft.shape)
print(link_bike_vol_tf.shape)

In [None]:
link_bike_vol_ft.head(15)

In [None]:
link_bike_vol_tf.head(15)

In [None]:
# join both the tf and ft table together to confirm they are the same
links_bv_both_dir = link_bike_vol_ft.merge(link_bike_vol_tf, left_on='link_id', right_on='link_id', how='inner')
print(links_bv_both_dir.shape)
print(list(links_bv_both_dir.columns))

In [None]:
# change field names
links_bv_both_dir = links_bv_both_dir[['link_id', 'TFkey_x', 'FTkey_x','from_node_x', 'to_node_x', 'bike_vol_x', 'bike_vol_y']].copy()
links_bv_both_dir.columns = ['link_id', 'TFkey', 'FTkey','from_node', 'to_node', 'bvol_ft', 'bvol_tf']

In [None]:
links_bv_both_dir.head(15)

In [None]:
# compare elements of bike volumes (from-to, to-from) to confirm they are the same
links_bv_both_dir['bvol_ft'].equals(links_bv_both_dir['bvol_tf'])

In [None]:
# Examine the column names
links_shp.columns

In [None]:
# export final result to csv
links3 = links_bv_both_dir[['link_id', 'bvol_ft']].copy()
links3.columns = ['link_id', 'bike_vol']
links3['link_id'] = links3['link_id'].astype('int64')
# # export to csv
# links3.to_csv(r".\Results\links_bv.csv")

# join bike vol to links shapefile
links4 = links_shp.merge(links3, left_on='TARGET_FID', right_on='link_id', how='outer')
links5= links4[[ 'Name', 'Oneway', 'Speed', 'AutoNetwor',
       'BikeNetwor', 'PedNetwork', 'SourceData', 'DriveTime', 'BikeTime',
       'Pedestrian', 'Length_Mil', 'ConnectorN', 'RoadClass', 'AADT',
       'AADT_YR', 'BIKE_L', 'BIKE_R', 'Shape_Leng', 'Start_Key',
       'End_Key', 'Bike_Lane', 'Bike_Path', 'Bike_Blvd', 'SIGID', 'Signal','bike_vol', 'geometry']]

# export to shape
links5.to_file(r".\Results\links_bv.shp")


## Summarize zone trips by Attracting/Producing Zone

In [3]:
# read in zones
zones = gpd.read_file(r".\Data\microzones.shp")

### Read in trip tables, summarize, and format

In [5]:
def summarize_zones(trips_df, name):
    
    # summarize trips by attraction or production
    trips_sum_attr = pd.DataFrame(trips_df.groupby('azone')['bk'].sum())
    trips_sum_prod = pd.DataFrame(trips_df.groupby('pzone')['bk'].sum())
    
    # format tables
    trips_sum_attr['zone_id'] = trips_sum_attr.index
    trips_sum_attr.columns = [name + '_abk', 'zone_id']
    trips_sum_prod['zone_id'] = trips_sum_prod.index
    trips_sum_prod.columns = [name + '_pbk', 'zone_id']
    
    # join the attraction and production summary tables using zone id
    merged = trips_sum_attr.merge(trips_sum_prod, left_on='zone_id', right_on='zone_id', how='outer')
    return merged
    

In [None]:
# sch_univ = pd.read_csv(r".\Data\sch_univ_trip.csv")
# sch_univ_sum = summarize_zones(sch_univ, 'univ')
# sch_univ_sum.isnull().values.any()

In [7]:
# Discretionary trips (social trips, some recreation)
disc = pd.read_csv(r".\Data\disc_trip.csv")
disc_sum = summarize_zones(disc, 'disc')
del disc

# Maintenance trips (e.g. groceries)
maint = pd.read_csv(r".\Data\maint_trip.csv")
maint_sum = summarize_zones(maint, 'mnt')
del maint

# Maintenance trips non-home-based (e.g. groceries)
maint_nhb = pd.read_csv(r".\Data\maint_trip_nhb.csv")
maint_nhb_sum = summarize_zones(maint_nhb, 'mntnhb')
del maint_nhb

# Recreational family trips
rec_fam = pd.read_csv(r".\Data\rec_fam_trip.csv")
rec_fam_sum = summarize_zones(rec_fam, 'recfam')
del rec_fam

# Recreation long trips
rec_long = pd.read_csv(r".\Data\rec_long_trip.csv")
rec_long_sum = summarize_zones(rec_long, 'reclng')
del rec_long

# Recreation other trips (recreation that doesn't fall into family or long)
rec_oth = pd.read_csv(r".\Data\rec_oth_trip.csv")
rec_oth_sum = summarize_zones(rec_oth, 'recoth')
del rec_oth

# school (grade) trips
sch_grade = pd.read_csv(r".\Data\sch_grade_trip.csv")
sch_grade_sum = summarize_zones(sch_grade, 'grade')
del sch_grade

# school (university) trips
sch_univ = pd.read_csv(r".\Data\sch_univ_trip.csv")
sch_univ_sum = summarize_zones(sch_univ, 'univ')
del sch_univ

# Work trips
work = pd.read_csv(r".\Data\work_trip.csv")
work_sum = summarize_zones(work, 'wrk')
del work

# Work non-home-based trips
work_nhb = pd.read_csv(r".\Data\work_trip_nhb.csv")
work_nhb_sum = summarize_zones(work_nhb, 'wrknhb')
del work_nhb

In [8]:
rec_fam_sum

Unnamed: 0,recfam_abk,zone_id,recfam_pbk
0,0.012586,0,
1,0.012027,1,
2,0.005556,2,
3,0.003790,3,
4,0.002135,4,
...,...,...,...
18784,0.365143,19212,1.435160
18785,0.451545,19213,1.634315
18786,1.043511,19214,2.474203
18787,2.113814,19215,0.009276


### Merge trip summarizes back to microzone shapefile

In [9]:
# Create a clean copy of zones dataset
zones2 = zones[['zone_id', 'CO_TAZID', 'TAZID', 'CO_FIPS', 'CO_NAME', 'geometry']].copy()
zones2['zone_id'] = zones2['zone_id'].astype('int64')

# Join trip tables
zones2 = zones2.merge(disc_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(maint_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(maint_nhb_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(rec_fam_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(rec_long_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(rec_oth_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(sch_grade_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(sch_univ_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(work_sum, left_on='zone_id', right_on='zone_id', how='outer')
zones2 = zones2.merge(work_nhb_sum, left_on='zone_id', right_on='zone_id', how='outer')

# preview table
zones2.head(30)

Unnamed: 0,zone_id,CO_TAZID,TAZID,CO_FIPS,CO_NAME,geometry,disc_abk,disc_pbk,mnt_abk,mnt_pbk,mntnhb_abk,mntnhb_pbk,recfam_abk,recfam_pbk,reclng_abk,reclng_pbk,recoth_abk,recoth_pbk,grade_abk,grade_pbk,univ_abk,univ_pbk,wrk_abk,wrk_pbk,wrknhb_abk,wrknhb_pbk
0,0,0,0,0,,"POLYGON ((432271.913 4511646.351, 432587.351 4...",0.114104,,0.253344,,0.105898,,0.012586,,0.003809877,,0.071038,,0.182504,,0.547506,,0.063112,,0.048155,
1,1,0,0,0,,"POLYGON ((432587.351 4511685.482, 433708.825 4...",0.107797,,0.249164,,0.104151,,0.012027,,0.004086766,,0.070331,,0.175874,,0.55064,,0.059938,,0.045733,
2,2,0,0,0,,"POLYGON ((434783.825 4513368.123, 436523.175 4...",0.048038,,0.119005,,0.049744,,0.005556,,0.002217044,,0.043592,,0.083417,,0.239138,,0.026579,,0.02028,
3,3,0,0,0,,"POLYGON ((436523.175 4513990.279, 437808.876 4...",0.028127,,0.091951,,0.038436,,0.00379,,0.004927244,,0.031367,,0.056731,,0.021017,,0.016031,,0.012231,
4,4,0,0,0,,"POLYGON ((438170.076 4513697.879, 437971.005 4...",0.018139,,0.059783,,0.024989,,0.002135,,0.004170322,,0.029438,,0.031621,,0.010476,,0.012168,,0.009284,
5,5,0,0,0,,"POLYGON ((439107.670 4514080.595, 439893.846 4...",,,,,,,,,,,,,,,,,,,,
6,6,0,0,0,,"POLYGON ((439893.846 4514550.662, 440268.999 4...",,,,,,,,,,,,,,,,,,,,
7,7,0,0,0,,"POLYGON ((441107.148 4514640.267, 442255.076 4...",,,,,,,,,,,,,,,,,,,,
8,8,0,0,0,,"POLYGON ((442255.076 4515303.123, 442409.608 4...",,,,,,,,,,,,,,,,,,,,
9,9,0,0,0,,"POLYGON ((443356.952 4517164.217, 442409.608 4...",,,,,,,,,,,,,,,,,,,,


In [10]:
# Fill NAs with -1, then export to shape
zones2.fillna(-1).to_file(r".\Results\Microzone_Trip_Summaries.shp")

## Merge zone attraction and production scores with the microzone geometry

In [13]:
# Create a clean copy of zones dataset
zones2 = zones[['zone_id', 'CO_TAZID', 'TAZID', 'CO_FIPS', 'CO_NAME', 'geometry']].copy()
zones2['zone_id'] = zones2['zone_id'].astype('int64')

ascore = pd.read_csv(r".\Data\zone_attraction_size.csv")
pscore = pd.read_csv(r".\Data\zone_production_size.csv")

zones3a = zones2.merge(ascore, left_on='zone_id', right_on='zone_id', how='outer')
zones3p = zones2.merge(pscore, left_on='zone_id', right_on='zone_id', how='outer')

In [16]:
# Fill NAs with -1, then export to shape
zones3a.fillna(-1).to_file(r".\Results\Microzone_A_Scores.shp")
zones3p.fillna(-1).to_file(r".\Results\Microzone_P_Scores.shp")

## Get Centroid Nodes

In [17]:
nodes = gpd.read_file(r".\Data\nodes.shp")
nodes['node_id'] = nodes.index
nodes.shape

(108655, 28)

In [18]:
nodes2 = nodes[['node_id', 'xcoord', 'ycoord', 'zcoord', 'geometry']].copy()
centroids = nodes2.merge(zones[['NODE_ID', 'zone_id']], left_on='node_id', right_on='NODE_ID', how='inner')
print(centroids.columns)

Index(['node_id', 'xcoord', 'ycoord', 'zcoord', 'geometry', 'NODE_ID',
       'zone_id'],
      dtype='object')


In [19]:
centroids = centroids[['node_id', 'xcoord', 'ycoord', 'zcoord', 'zone_id', 'geometry']].copy()
centroids.to_file(r".\Results\Microzone_Centroids.shp")