In [1]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import json
import requests


import sys
sys.path.insert(0,str(Path.cwd().parent))
import file_structure_setup
config = file_structure_setup.filepaths()

# Retrieve LODES7 data

In [2]:
state_two_letter = "ga"
YYYY = 2010
lodes_url_base = f"https://lehd.ces.census.gov/data/lodes/LODES7/{state_two_letter}/od/{state_two_letter}_od_main_JT00_{YYYY}.csv.gz"
lodes = pd.read_csv(lodes_url_base)

#processs df
rename_dict = {
    'w_geocode':'work_blockid',
    'h_geocode':'home_blockid',
    'S000': 'total_jobs'  
}
lodes.rename(columns=rename_dict, inplace=True)
lodes = lodes[['work_blockid','home_blockid','total_jobs']]
lodes.head()


Unnamed: 0,work_blockid,home_blockid,total_jobs
0,130019501001011,131619601003016,1
1,130019501001011,132799705003044,1
2,130019501001015,133097802001051,1
3,130019501001028,130019501002104,1
4,130019501001028,130019502003096,1


# Census Blocks

In [4]:
#study area for masking blocks
studyarea = gpd.read_file(config['studyarea_fp'])#.to_crs(config['projected_crs_epsg'])
studyarea = studyarea.unary_union.envelope

In [5]:
#bring in census blocks (get from https://www.census.gov/cgi-bin/geo/shapefiles/index.php)
#TODO script downloading the blocks
blocks = gpd.read_file("/Users/tannerpassmore/Documents/BikewaySim/Savannah/OD Creation/tl_2010_13_tabblock10.shp",mask=studyarea).to_crs(config['projected_crs_epsg'])
blocks.rename(columns={'GEOID10':'blockid'},inplace=True)
blocks = blocks[['blockid','geometry']]

# Data Wrangling

In [6]:
#make sure type are the same
lodes['work_blockid'] = lodes['work_blockid'].astype(int)
lodes['home_blockid'] = lodes['home_blockid'].astype(int)
blocks['blockid'] = blocks['blockid'].astype(int)

In [7]:
#filter lodes data
lodes = lodes[lodes['work_blockid'].isin(blocks['blockid']) & lodes['home_blockid'].isin(blocks['blockid'])]

# Export

In [8]:
blocks.head()

Unnamed: 0,blockid,geometry
0,130299203063082,"POLYGON ((968769.370 631469.073, 968761.138 63..."
1,130299800001187,"POLYGON ((907321.761 743495.668, 907325.636 74..."
2,130299201022119,"POLYGON ((892581.561 767199.806, 892585.605 76..."
3,130299800001183,"POLYGON ((902700.375 751678.882, 902718.539 75..."
4,130299203052012,"POLYGON ((935276.699 695680.643, 935375.519 69..."


In [9]:
lodes.head()

Unnamed: 0,work_blockid,home_blockid,total_jobs
239231,130510001001004,130510011002033,1
239232,130510001001004,130510027004012,1
239233,130510001001004,130510028001005,1
239234,130510001001004,130510036012013,1
239235,130510001001004,130510036024025,1


In [10]:
#export the taz areas
blocks.to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='taz_polygons')

#export the taz centroids
blocks_centroids = blocks.copy()
blocks_centroids.geometry = blocks_centroids.geometry.centroid
blocks_centroids.to_crs(config['projected_crs_epsg']).to_file(config['bikewaysim_fp']/'map_layers.gpkg',layer='block_centroids')

#export the ods
lodes.to_csv(config['bikewaysim_fp']/'od_matrix.csv',index=False)

Exploring past here

In [11]:
# works = lodes[['work_blockid','total_jobs']].copy()
# works = works.groupby('work_blockid')['total_jobs'].sum().reset_index()
# works = pd.merge(works,blocks[['blockid','geometry']],left_on="work_blockid",right_on='blockid')
# works = gpd.GeoDataFrame(works,geometry='geometry')
# works.explore('total_jobs')
# merged = pd.merge(lodes,blocks[['blockid','geometry']],left_on="work_blockid",right_on='blockid',how='left')
# merged.drop(columns=['blockid'],inplace=True)
# merged.rename(columns={'geometry':'work_geo'},inplace=True)
# merged = pd.merge(merged,blocks[['blockid','geometry']],left_on="home_blockid",right_on='blockid',how='left')
# merged.drop(columns=['blockid'],inplace=True)
# merged.rename(columns={'geometry':'home_geo'},inplace=True)
# merged = merged[merged['home_geo'].notna() & merged['work_geo'].notna()]
# random_id = merged['work_blockid'].sample(1).item()
# subset = merged.loc[merged['work_blockid']==random_id].copy()
# subset = gpd.GeoDataFrame(subset,geometry='home_geo')
# m = subset.explore()
# blocks.loc[blocks['blockid']==random_id,'geometry'].explore()
# work = blocks.loc[blocks['blockid']==random_id,'geometry'].to_crs('epsg:4326').item().centroid
# x, y = list(work.coords)[0]
# import folium
# folium.Circle((y,x),radius=100,tooltip='WORK',kwargs={'color':'green'}).add_to(m)
# m
# merged['work_geo'] = gpd.GeoSeries(merged['work_geo']).centroid
# merged['home_geo'] = gpd.GeoSeries(merged['home_geo']).centroid
# # import geopandas as gpd
# # import pandas as pd
# # from pathlib import Path
# # import json

# # config = json.load((Path.cwd().parent / 'config.json').open('rb'))
# # network_fp = Path(config['project_directory']) / "Network"
# # osmdwnld_fp = Path(config['project_directory']) / "OSM_Download"
# # elevation_fp = Path(config['project_directory']) / "Elevation"
# # cyclinginfra_fp = Path(config['project_directory']) / "Cycling_Infra_Dating"
# # calibration_fp = Path(config['project_directory']) / "Calibration"



# #'foot' in blocks.crs.axis_info[0].unit_name

# #df.crs.axis_info[0].unit_name