# Census LEHD WAC Location Quotient Difference 2002 – 2015

Calculating the _Location Quotient_ difference of LEHD WAC data.  
**Note:** lots of duplication in this notebook for the 2002 and 2015 LQ analysis that could be abstracted, which is what I ended up doing for the python script that is used to create the output data when running `make`.

In [17]:
import pandas as pd
import geopandas as gpd
import numpy as np
import sys, os

## Load Data

In [18]:
# specify paths for csv and shapefile data
dirname = os.path.dirname(os.path.realpath("__file__"))
wac2015_filepath = os.path.join(dirname, "../data/wac/ca_wac_S000_JT00_2015.csv.gz")
wac2002_filepath = os.path.join(dirname, "../data/wac/ca_wac_S000_JT00_2002.csv.gz")
cxwalk_filepath = os.path.join(dirname, "../data/wac/ca_xwalk.csv.gz")
tracts_shp_filepath = os.path.join(dirname, "../data/census_tracts/tracts_2010_4326.shp")

In [19]:
# load 2002 & 2015 census wac data, plus crosswalk file
wac2015 = pd.read_csv(wac2015_filepath, sep=",", delimiter=None, header="infer", names=None, index_col=None, usecols=None, compression="gzip")
wac2002 = pd.read_csv(wac2002_filepath, sep=",", delimiter=None, header="infer", names=None, index_col=None, usecols=None, compression="gzip")
xwalk = pd.read_csv(cxwalk_filepath, sep=",", delimiter=None, header="infer", names=None, index_col=None, usecols=None, compression="gzip", encoding="ISO-8859-1", low_memory=False)

## Filter and Rollup 

In [20]:
# filter crosswalk table by 9 counties of SF Bay Area
cty_fips_list = [6001, 6013, 6041, 6055, 6075, 6081, 6085, 6095, 6097]
cxwalk = cxwalk[cxwalk['cty'].isin(cty_fips_list)]

In [21]:
# keep only the block and tract id columns
cxwalk = cxwalk[['tabblk2010', 'trct']]

In [22]:
# join 2015 and 2002 wac files to cxwalk using fields w_geocode and tabblk2010
wac2015 = wac2015.merge(cxwalk, how="inner", left_on="w_geocode", right_on="tabblk2010")
wac2002 = wac2002.merge(cxwalk, how="inner", left_on="w_geocode", right_on="tabblk2010")

In [23]:
# NAICS codes for each super category
makers = ['CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS06', 'CNS08']
services = ['CNS07', 'CNS14', 'CNS17', 'CNS18']
professions = ['CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13']
support = ['CNS15', 'CNS16', 'CNS19', 'CNS20']

In [24]:
# create new aggregate columns for various job sectors
wac2015['makers'] = wac2015[makers].sum(axis=1)
wac2015['services'] = wac2015[services].sum(axis=1)
wac2015['professions'] = wac2015[professions].sum(axis=1)
wac2015['support'] = wac2015[support].sum(axis=1)
wac2015['total'] = wac2015['C000']

wac2002['makers'] = wac2002[makers].sum(axis=1)
wac2002['services'] = wac2002[services].sum(axis=1)
wac2002['professions'] = wac2002[professions].sum(axis=1)
wac2002['support'] = wac2002[support].sum(axis=1)
wac2002['total'] = wac2002['C000']

In [25]:
# make sure things add up
assert sum(wac2015['C000'] -(wac2015['makers'] + wac2015['services'] + wac2015['professions'] + wac2015['support'])) == 0
assert sum(wac2002['C000'] -(wac2002['makers'] + wac2002['services'] + wac2002['professions'] + wac2002['support'])) == 0

In [27]:
# keep only the columns we need from the wac dataframe
to_keep = ['trct', 'makers', 'services', 'professions', 'support', 'total']
wac2015 = wac2015[to_keep]
wac2002 = wac2002[to_keep]

In [31]:
# group and aggregate data by census tract
wac2015 = wac2015.groupby('trct', as_index=False).agg(np.sum)
wac2002 = wac2002.groupby('trct', as_index=False).agg(np.sum)

## LQ Processing

In [34]:
# store totals for each category, these will be the total jobs by category for the entire bay area
makers_total_2015 = wac2015['makers'].sum()
services_total_2015 = wac2015['services'].sum()
professions_total_2015 = wac2015['professions'].sum()
support_total_2015 = wac2015['support'].sum()
all_total_2015 = wac2015['total'].sum()

makers_total_2002 = wac2002['makers'].sum()
services_total_2002 = wac2002['services'].sum()
professions_total_2002 = wac2002['professions'].sum()
support_total_2002 = wac2002['support'].sum()
all_total_2002 = wac2002['total'].sum()

In [37]:
# calculate percentages for each category, these will be used for determining the location quotients later
makers_pct_2015 = makers_total_2015 / all_total_2015
services_pct_2015 = services_total_2015 / all_total_2015
professions_pct_2015 = professions_total_2015 / all_total_2015
support_pct_2015 = support_total_2015 / all_total_2015

makers_pct_2002 = makers_total_2002 / all_total_2002
services_pct_2002 = services_total_2002 / all_total_2002
professions_pct_2002 = professions_total_2002 / all_total_2002
support_pct_2002 = support_total_2002 / all_total_2002

In [39]:
wac_all = pd.DataFrame()

In [40]:
# compute tract level location quotients
wac_all['make_lq2015'] = wac2015['makers'] / wac2015['total'] / makers_pct_2015
wac_all['serv_lq2015'] = wac2015['services'] / wac2015['total'] / services_pct_2015
wac_all['prof_lq2015'] = wac2015['professions'] / wac2015['total'] / professions_pct_2015
wac_all['supp_lq2015'] = wac2015['support'] / wac2015['total'] / support_pct_2015

wac_all['make_lq2002'] = wac2002['makers'] / wac2002['total'] / makers_pct_2002
wac_all['serv_lq2002'] = wac2002['services'] / wac2002['total'] / services_pct_2002
wac_all['prof_lq2002'] = wac2002['professions'] / wac2002['total'] / professions_pct_2002
wac_all['supp_lq2002'] = wac2002['support'] / wac2002['total'] / support_pct_2002

In [41]:
# compute the lq difference from 2002 – 2015
wac_all['make_change'] = wac_all['make_lq2015'] - wac_all['make_lq2002']
wac_all['serv_change'] = wac_all['serv_lq2015'] - wac_all['serv_lq2002']
wac_all['prof_change'] = wac_all['prof_lq2015'] - wac_all['prof_lq2002']
wac_all['supp_change'] = wac_all['supp_lq2015'] - wac_all['supp_lq2002']

## Output to CSV

In [43]:
# columns to keep for output csv
columns = ['make_lq2015', 'make_lq2002', 'make_change', 'serv_lq2015', 'serv_lq2002', 'serv_change', 'prof_lq2015', 'prof_lq2002', 'prof_change', 'supp_lq2015', 'supp_lq2002', 'supp_change']
outfile = os.path.join(dirname, 'wac_lq_change_2002_2015.csv')
wac_all.to_csv(outfile, columns=columns, index=False, encoding="utf-8")