# Census LEHD WAC Location Quotient Analysis

Calculating the _Location Quotient_ of LEHD WAC data.

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import sys, os

## Load Data

In [2]:
# specify paths for csv and shapefile data
dirname = os.path.dirname(os.path.realpath("__file__"))
wac2015_filepath = os.path.join(dirname, "../data/wac/ca_wac_S000_JT00_2015.csv.gz")
cxwalk_filepath = os.path.join(dirname, "../data/wac/ca_xwalk.csv.gz")

In [16]:
# load 2002 & 2015 census wac data, plus crosswalk file
wac2015 = pd.read_csv(wac2015_filepath, sep=",", delimiter=None, header="infer", names=None, index_col=None, usecols=None, compression="gzip")
cxwalk = pd.read_csv(cxwalk_filepath, sep=",", delimiter=None, header="infer", names=None, index_col=None, usecols=None, compression="gzip", encoding="ISO-8859-1", low_memory=False)

## Filter and Rollup 

In [17]:
# filter crosswalk table by 9 counties of SF Bay Area
cty_fips_list = [6001, 6013, 6041, 6055, 6075, 6081, 6085, 6095, 6097]
cxwalk = cxwalk[cxwalk['cty'].isin(cty_fips_list)]

In [18]:
# keep only the block and tract id columns
cxwalk = cxwalk[['tabblk2010', 'trct']]

In [19]:
# join 2015 wac files to cxwalk using fields w_geocode and tabblk2010
wac = wac2015.merge(cxwalk, how="inner", left_on="w_geocode", right_on="tabblk2010")

In [20]:
# NAICS codes for each super category
makers = ['CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS06', 'CNS08']
services = ['CNS07', 'CNS14', 'CNS17', 'CNS18']
professions = ['CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13']
support = ['CNS15', 'CNS16', 'CNS19', 'CNS20']

In [21]:
# create new aggregate columns for various job sectors
wac['makers'] = wac[makers].sum(axis=1)
wac['services'] = wac[services].sum(axis=1)
wac['professions'] = wac[professions].sum(axis=1)
wac['support'] = wac[support].sum(axis=1)
wac['total'] = wac['C000']

In [22]:
# make sure things add up
assert sum(wac['C000'] -(wac['makers'] + wac['services'] + wac['professions'] + wac['support'])) == 0

In [23]:
# keep only the columns we need from the wac dataframe
wac = wac[['trct', 'makers', 'services', 'professions', 'support', 'total']]

In [24]:
# group and aggregate data by census tract
wac = wac.groupby('trct', as_index=False).agg(np.sum)

## LQ Processing

In [25]:
# store totals for each category, these will be the total jobs by category for the entire bay area
makers_total = wac['makers'].sum()
services_total = wac['services'].sum()
professions_total = wac['professions'].sum()
support_total = wac['support'].sum()
all_total = makers_total + services_total + professions_total + support_total * 1.0

In [26]:
# calculate percentages for each category, these will be used for determining the location quotients later
makers_pct = makers_total / all_total
services_pct = services_total / all_total
professions_pct = professions_total / all_total
support_pct = support_total / all_total

In [27]:
# compute tract level location quotients
wac['make_lq'] = wac['makers'] / wac['total'] / makers_pct
wac['serv_lq'] = wac['services'] / wac['total'] / services_pct
wac['prof_lq'] = wac['professions'] / wac['total'] / professions_pct
wac['supp_lq'] = wac['support'] / wac['total'] / support_pct

## Output to CSV

In [28]:
# columns to keep for output csv
columns = ['trct', 'make_lq', 'serv_lq', 'prof_lq', 'supp_lq']
outfile = os.path.join(dirname, 'wac2015_lq.csv')
wac.to_csv(outfile, columns=columns, index=False, encoding="utf-8")