This notebook merges LODES home/work locations with census blocks to get home/work lat-lng block coordinates.

Data sources:
  - 2018 LEHD LODES: https://lehd.ces.census.gov/data/
  - 2020 Census blocks: https://www2.census.gov/geo/tiger/TIGER2020/

In [None]:
import geopandas as gpd
import osmnx as ox
import pandas as pd
from shapely.geometry import Point

## Get all census blocks within some study site

In [None]:
# create a study site (copied code from lecture notebook)
latlng_coords = ox.geocode('Los Angeles City Hall')
latlng_point = Point(latlng_coords[1], latlng_coords[0])
latlng_point_proj, crs = ox.projection.project_geometry(latlng_point)
polygon_proj = latlng_point_proj.buffer(5000)
polygon, crs = ox.projection.project_geometry(polygon_proj, crs=crs, to_latlong=True)

In [None]:
# load CA census blocks shapefile
gdf_blocks = gpd.read_file('../../data/tl_2020_06_tabblock20/').set_index('GEOID20')
gdf_blocks.shape

In [None]:
# get all blocks that intersect the study site polygon
study_site_blocks = gdf_blocks[gdf_blocks.intersects(polygon)]
study_site_blocks.shape

## Get all LODES with both home and work in study site

In [None]:
# load CA LODES
df = pd.read_csv('../../data/lodes/ca_od_main_JT00_2018.csv',
                 dtype={'w_geocode': str, 'h_geocode': str})
df.shape

In [None]:
# get all LODES rows in study site blocks
mask_h = df['h_geocode'].isin(study_site_blocks.index)
mask_w = df['w_geocode'].isin(study_site_blocks.index)
study_site_lodes = df[mask_h & mask_w]
study_site_lodes.shape

## Merge the data, save to disk

In [None]:
# retain the columns we want
block_trips = study_site_lodes[['h_geocode', 'w_geocode', 'S000']]
block_latlng = study_site_blocks[['INTPTLAT20', 'INTPTLON20']].astype(float)

In [None]:
# merge in home lat-lng
od = pd.merge(left=block_trips,
              right=block_latlng,
              left_on='h_geocode',
              right_index=True,
              how='inner')

# merge in work lat-lng
od = pd.merge(left=od,
              right=block_latlng,
              left_on='w_geocode',
              right_index=True,
              suffixes=['_home', '_work'],
              how='inner')
od.shape

In [None]:
# rename columns then save to disk
cols = {'h_geocode': 'home_block',
        'w_geocode': 'work_block',
        'S000': 'job_count',
        'INTPTLAT20_home': 'home_lat',
        'INTPTLON20_home': 'home_lng',
        'INTPTLAT20_work': 'work_lat',
        'INTPTLON20_work': 'work_lng'}

od = od.rename(columns=cols)
od.to_csv('../../data/od.csv', index=False)

In [None]:
# see the final results
od