# Prepare Census Block OD Data
---

In [1]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import requests
import pickle

from bikewaysim.paths import config
from bikewaysim.framework import bikewaysim_functions

# Retrieve LODES7 data

In [2]:
state_two_letter = "ga"
YYYY = 2010
lodes_url_base = f"https://lehd.ces.census.gov/data/lodes/LODES7/{state_two_letter}/od/{state_two_letter}_od_main_JT00_{YYYY}.csv.gz"
lodes = pd.read_csv(lodes_url_base)

#processs df
rename_dict = {
    'w_geocode':'work_blockid',
    'h_geocode':'home_blockid',
    'S000': 'total_jobs'  
}
lodes.rename(columns=rename_dict, inplace=True)
lodes = lodes[['work_blockid','home_blockid','total_jobs']]


# Census Blocks

In [3]:
#study area for masking blocks
studyarea = gpd.read_file(config['studyarea_fp'])#.to_crs(config['projected_crs_epsg'])
studyarea = studyarea.unary_union.envelope

  studyarea = studyarea.unary_union.envelope


In [4]:
config.keys()

dict_keys(['project_fp', 'raw_data_fp', 'projected_crs_epsg', 'geofabrik_year', 'geofabrik_state', 'usgs_fp', 'here_fp', 'gdot_road_inventory_fp', 'gdot_traffic_fp', 'gdot_signals_fp', 'coa_bike_fp', 'arc_bike_fp', 'basemap_api_keys', 'mod_name', 'bicycle_facilities_fp', 'bikewaysim_fp', 'calibration_fp', 'cycleatl_fp', 'matching_fp', 'network_fp', 'osmdwnld_fp', 'scratch_fp', 'figures_fp', 'geofabrik_fp', 'studyarea_fp'])

In [5]:
#bring in census blocks (get from https://www.census.gov/cgi-bin/geo/shapefiles/index.php)
#TODO script downloading the blocks
#move into bikewaysim_fp and rename to census_blocks
blocks = gpd.read_file(config['bikewaysim_fp'] / "census_blocks.zip",mask=studyarea).to_crs(config['projected_crs_epsg'])
blocks.rename(columns={'GEOID20':'blockid'},inplace=True)
blocks = blocks[['blockid','geometry']]

# Data Wrangling

In [6]:
#make sure type are the same
lodes['work_blockid'] = lodes['work_blockid'].astype(int)
lodes['home_blockid'] = lodes['home_blockid'].astype(int)
blocks['blockid'] = blocks['blockid'].astype(int)

In [7]:
#filter lodes data
print('lodes:',lodes.shape[0],'blocks:',blocks.shape[0])
lodes = lodes[lodes['work_blockid'].isin(blocks['blockid']) & lodes['home_blockid'].isin(blocks['blockid'])]
blocks = blocks[blocks['blockid'].isin(lodes['work_blockid']) | blocks['blockid'].isin(lodes['home_blockid'])]
print('lodes:',lodes.shape[0],'blocks:',blocks.shape[0])

lodes: 3196531 blocks: 1112
lodes: 518 blocks: 195


Only 3 mile trips

In [8]:
block_geo = dict(zip(blocks['blockid'],blocks['geometry'].centroid))
home = gpd.GeoSeries(lodes['home_blockid'].map(block_geo),crs=blocks.crs)
work = gpd.GeoSeries(lodes['work_blockid'].map(block_geo),crs=blocks.crs)
lodes = lodes[home.distance(work) <= 3 * 5280]

In [9]:
#rename to make it easier to work with teh code for tazs
#assume home is the origin
lodes.columns = ['dest_taz','orig_taz','trips']
blocks.columns = ['tazid','geometry']

# Snap OD pairs to network

In [10]:
# import network nodes
nodes = gpd.read_file(config['network_fp']/"final_network.gpkg",layer="nodes")

with (config['calibration_fp']/"calibration_network.pkl").open('rb') as fh:
    links,turns = pickle.load(fh)
del turns

nodes0 = pd.concat([links['A'],links['B']]).unique().tolist()
nodes = nodes[nodes['N'].isin(nodes0)]

In [11]:
blocks = bikewaysim_functions.snap_ods_to_network(blocks,nodes)

#knock out if over 1000 feet
blocks = blocks[blocks['dist']<=500]

In [12]:
snapped_ods = dict(zip(blocks['tazid'],blocks['N']))
lodes['orig_N'] = lodes['orig_taz'].map(snapped_ods)
lodes['dest_N'] = lodes['dest_taz'].map(snapped_ods)

In [13]:
lodes.dropna(inplace=True)

In [14]:
trip_origins_per_taz = lodes.groupby('orig_taz')['trips'].sum().reset_index()
trip_origins_per_taz.columns = ['tazid','trip_origins']

In [15]:
# no trips
no_trips = blocks[blocks['tazid'].isin(trip_origins_per_taz['tazid'])==False]
no_trips

Unnamed: 0,tazid,geometry,N,dist
10,131210010021009,"POLYGON ((2223791.182 1373110.711, 2224068.678...",552540324,156.343363
12,131210010012008,"POLYGON ((2228961.098 1375869.272, 2228967.846...",5416416435,210.980413
18,131210001002004,"POLYGON ((2240410.133 1376187.583, 2240423.271...",6003230883,305.843583
19,130890205001001,"POLYGON ((2241117.075 1367213.659, 2241119.275...",12857831385,458.016681
20,131210001003010,"POLYGON ((2240647.152 1380622.017, 2240691.733...",12806245284,341.866157
25,131210016001007,"POLYGON ((2239960.399 1370276.232, 2239995.186...",5416384428,264.330886
36,131210030002000,"POLYGON ((2237773.838 1367022.173, 2238020.317...",8481750157,83.376551
40,131210029001008,"POLYGON ((2233472.411 1367355.113, 2234131.107...",3430512216,115.817146
47,130890202002001,"POLYGON ((2241183.568 1372846.968, 2241185.619...",7320657846,413.164198
49,131210004001006,"POLYGON ((2229411.288 1379360.378, 2229411.376...",69290388,224.297003


In [16]:
blocks = blocks.merge(trip_origins_per_taz,on='tazid')

In [17]:
blocks.to_file(config['bikewaysim_fp']/'ods.gpkg',layer='trip_origins')

In [22]:
# trip origins per TAZ
#export the taz areas
blocks.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='taz_polygons')
#export the taz centroids
taz_centroids = blocks.copy().to_crs(config['projected_crs_epsg'])
taz_centroids.geometry = taz_centroids.geometry.centroid
taz_centroids.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='taz_centroids')

In [None]:
# NOTE I think this is here for blocks with no trips
# studyarea = gpd.read_file(config['studyarea_fp'])
# blocks0 = gpd.read_file("/Users/tannerpassmore/Documents/BikewaySim/Savannah/OD Creation/tl_2010_13_tabblock10.shp",mask=studyarea).to_crs(config['projected_crs_epsg'])
# blocks0.rename(columns={'GEOID10':'tazid'},inplace=True)
# blocks0['tazid'] = blocks0['tazid'].astype(int)
# blocks0 = blocks0[['tazid','geometry']]
# blocks0 = blocks0[blocks0['tazid'].isin(blocks['tazid'])==False]
# blocks0.plot()

In [None]:
# useful for visiualization
# blocks0.to_file(config['bikewaysim_fp']/'ods.gpkg',layer='filtered_out_or_zero')

# Export

In [25]:
print(lodes['trips'].sum(),'trips across',lodes.shape[0],'unique OD pairs')#,tripData['hh_id'].nunique(),'persons')
print('and',blocks.shape[0],'census blocks')

522 trips across 463 unique OD pairs
and 132 census blocks


In [26]:
# #export the taz areas
# blocks.to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='block_polygons')

# #export the taz centroids
# blocks_centroids = blocks.copy()
# blocks_centroids.geometry = blocks_centroids.geometry.centroid
# blocks_centroids.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='block_centroids')

#export the ods
lodes.to_csv(config['bikewaysim_fp']/'od_matrix.csv',index=False)

Exploring past here

In [27]:
lodes

Unnamed: 0,dest_taz,orig_taz,trips,orig_N,dest_N
926040,130890202002005,130890202002005,1,7.320658e+09,7.320658e+09
926077,130890202002005,130890224011011,1,6.877187e+09,7.320658e+09
926107,130890202002005,131210029001021,1,7.468310e+09,7.320658e+09
926108,130890202002005,131210030002002,1,6.927455e+07,7.320658e+09
926136,130890202002007,130890202002011,1,5.422171e+09,6.819200e+07
...,...,...,...,...,...
1535237,131210035002001,131210021001002,1,6.008664e+09,3.172082e+09
1535238,131210035002001,131210021001010,2,1.287599e+10,3.172082e+09
1535504,131210035002003,131210030002002,1,6.927455e+07,6.934994e+07
1535613,131210035002004,131210021001000,1,6.931249e+07,1.130069e+10


In [None]:
# works = lodes[['work_blockid','total_jobs']].copy()
# works = works.groupby('work_blockid')['total_jobs'].sum().reset_index()
# works = pd.merge(works,blocks[['blockid','geometry']],left_on="work_blockid",right_on='blockid')
# works = gpd.GeoDataFrame(works,geometry='geometry')
# works.explore('total_jobs')
# merged = pd.merge(lodes,blocks[['blockid','geometry']],left_on="work_blockid",right_on='blockid',how='left')
# merged.drop(columns=['blockid'],inplace=True)
# merged.rename(columns={'geometry':'work_geo'},inplace=True)
# merged = pd.merge(merged,blocks[['blockid','geometry']],left_on="home_blockid",right_on='blockid',how='left')
# merged.drop(columns=['blockid'],inplace=True)
# merged.rename(columns={'geometry':'home_geo'},inplace=True)
# merged = merged[merged['home_geo'].notna() & merged['work_geo'].notna()]
# random_id = merged['work_blockid'].sample(1).item()
# subset = merged.loc[merged['work_blockid']==random_id].copy()
# subset = gpd.GeoDataFrame(subset,geometry='home_geo')
# m = subset.explore()
# blocks.loc[blocks['blockid']==random_id,'geometry'].explore()
# work = blocks.loc[blocks['blockid']==random_id,'geometry'].to_crs('epsg:4326').item().centroid
# x, y = list(work.coords)[0]
# import folium
# folium.Circle((y,x),radius=100,tooltip='WORK',kwargs={'color':'green'}).add_to(m)
# m
# merged['work_geo'] = gpd.GeoSeries(merged['work_geo']).centroid
# merged['home_geo'] = gpd.GeoSeries(merged['home_geo']).centroid
# # import geopandas as gpd
# # import pandas as pd
# # from pathlib import Path
# # import json

# # config = json.load((Path.cwd().parent / 'config.json').open('rb'))
# # network_fp = Path(config['project_directory']) / "Network"
# # osmdwnld_fp = Path(config['project_directory']) / "OSM_Download"
# # elevation_fp = Path(config['project_directory']) / "Elevation"
# # cyclinginfra_fp = Path(config['project_directory']) / "Cycling_Infra_Dating"
# # calibration_fp = Path(config['project_directory']) / "Calibration"



# #'foot' in blocks.crs.axis_info[0].unit_name

# #df.crs.axis_info[0].unit_name