Download LEHD data at block level and join with block shapefile.

### Install / load packages

In [1]:
pip install pygris

Note: you may need to restart the kernel to use updated packages.


In [3]:
from pygris.data import get_lodes
from pygris import blocks
from pygris import places
import pandas as pd
import geopandas as gpd

### Load LEHD data using `get_lodes` function from `pygris`

In [4]:
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

In [5]:
lehd = []

for which_state in states:
    
    try:
    
        state_lehd = get_lodes(
            state = which_state, 
            year = 2019, 
            lodes_type = "wac", 
            agg_level = "block"
        )

        state_lehd["state"] = which_state

        lehd.append(state_lehd)
    
    except:
        
        pass

In [9]:
df = pd.concat(lehd)[["state", "w_geocode", "C000"]]

In [10]:
df

Unnamed: 0,state,w_geocode,C000
0,AL,010010201001000,2
1,AL,010010201001001,2
2,AL,010010201001002,1
3,AL,010010201001012,28
4,AL,010010201001019,15
...,...,...,...
8844,WY,560459513003039,3
8845,WY,560459513003040,1
8846,WY,560459513003043,5
8847,WY,560459513003044,67


In [11]:
df.state.unique()

array(['AL', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID',
       'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN',
       'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH',
       'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA',
       'WA', 'WV', 'WI', 'WY'], dtype=object)

In [12]:
df.state.nunique()

48

### Load block group shapefiles (and land area)

In [None]:
bl = []

for which_state in states:
    
    try:
    
        state_bl = blocks(state = which_state, year = 2019)

        state_bl["state"] = which_state

        bl.append(state_bl)
    
    except:
        
        pass

Using FIPS code '01' for input 'AL'


In [None]:
bl.head()

In [14]:
bl_sf = pd.concat(bl)[["state", "GEOID", "ALAND", "geometry"]]

ValueError: No objects to concatenate

In [None]:
bl_sf.head()

In [None]:
bl_sf.state.unique()

In [None]:
bl_sf.state.nunique()

In [None]:
bl_sanfran = bl_sf.loc[bl_sf.GEOID.str.contains('^06075'), :]

In [None]:
bl_sanfran.plot()

### Join LEHD data and shapefile

In [None]:
df.dtypes

In [None]:
bl_sf.dtypes

In [None]:
df.head()

In [None]:
bl_sf.head()

In [None]:
df.shape[0]

In [None]:
bl_sf.shape[0]

In [None]:
df[df['state'] == 'AR']

In [None]:
bl_sf[bl_sf['state'] == 'AR'].head()

In [None]:
final_df = bl_sf.merge(df, how = 'left', left_on = ['state', 'GEOID'], right_on = ['state', 'w_geocode']).drop(columns = ['w_geocode']).rename(columns={"C000": "total_jobs", "ALAND": "land_area", "GEOID": "block"})

In [None]:
final_df.shape[0]

In [None]:
final_df.head()

In [None]:
final_df.state.nunique()

In [None]:
df_states = df.state.unique().tolist()
final_df_states = final_df.state.unique().tolist()

list(set(df_states).difference(final_df_states))

In [None]:
type(final_df)

### Subset to cities of interest

In [None]:
city_list = [
  'Albuquerque NM',
  'Atlanta GA',
  'Austin TX',
  'Bakersfield CA',
  'Baltimore MD',
  'Boston MA',
  'Charlotte NC',
  'Chicago IL',
  'Cincinnati OH',
  'Cleveland OH',
  'Colorado Springs CO',
  'Columbus OH',
  'Dallas TX',
  'Denver CO',
  'Detroit MI',
  'El Paso TX',
  'Fort Worth TX',
  'Fresno CA',
  'Urban Honolulu HI',
  'Houston TX',
  'Indianapolis city (balance) IN',
  'Jacksonville FL',
  'Kansas City MO',
  'Las Vegas NV',
  'Los Angeles CA',
  'Louisville KY',
  'Memphis TN',
  'Miami FL',
  'Milwaukee WI',
  'Minneapolis MN',
  'Nashville-Davidson metropolitan government (balance) TN',
  'New Orleans LA',
  'New York NY',
  'Oakland CA',
  'Oklahoma City OK',
  'Omaha NE',
  'Orlando FL',
  'Philadelphia PA',
  'Phoenix AZ',
  'Pittsburgh PA',
  'Portland OR',
  'Raleigh NC',
  'Sacramento CA',
  'Salt Lake City UT',
  'San Antonio TX',
  'San Diego CA',
  'San Francisco CA',
  'San Jose CA',
  'Seattle WA',
  'St. Louis MO',
  'Tampa FL',
  'Tucson AZ',
  'Tulsa OK',
  'Washington DC']

In [None]:
all_places = places(cb = True)

In [None]:
all_places.head()

In [None]:
pd.set_option('display.max_colwidth', None)
all_places.loc[all_places.NAME.str.contains('Nashville|Indianapolis|Honolulu|St\.? Louis'), :][['NAME', 'STUSPS']]

In [None]:
all_places['city'] = all_places.apply(lambda x: '{} {}'.format(x['NAME'], x['STUSPS']), axis=1)

In [None]:
all_places.city.head()

In [None]:
all_places_simp = all_places[['city', 'geometry']]
the_places = all_places_simp[all_places_simp['city'].isin(city_list)]

In [None]:
pd.reset_option('^display.', silent=True)
the_places.head()

In [None]:
the_places.city.unique()

In [None]:
the_places.city.nunique()

In [None]:
in_places = the_places.city.unique().tolist()

list(set(city_list).difference(in_places))

In [None]:
final_df.crs == the_places.crs

In [None]:
df_joined = gpd.sjoin(final_df, the_places, how = "inner").drop(columns = ['index_right'])

In [None]:
df_joined.head()

In [None]:
sf_test = df_joined[df_joined['city']=='San Francisco CA']

In [None]:
sf_test.plot()

In [None]:
sf_test2 = final_df.loc[final_df.block.str.contains('^06075'), :]

In [None]:
sf_test2.plot()

In [None]:
df_joined['jobs_per_sq_meter'] = df_joined['total_jobs']/df_joined['land_area']

In [None]:
df_joined.head()

In [None]:
df_joined.land_area.mean()

### Export as geodataframe

In [None]:
df_joined.to_file("C:/Users/jpg23/data/downtownrecovery/lehd_new_downtowns/cities_lehd_jobs_block.geojson", driver="GeoJSON")  