In [1]:
import pandas as pd

In [2]:
def zcta_stats(df):
    return pd.DataFrame({'hus10': df['hus10'].sum(),
                         'afact': df['afact'].sum()}, 
                        index=[0])

In [3]:
# Read in allocation factor information
hu_afact_df = pd.read_csv('other_data/Geocorr_2014_Output.csv', 
                          skiprows=[1])

In [4]:
# Check to make sure that the allocation factor for each ZCTA is 1
zcta_afact = hu_afact_df.groupby('zcta5')['afact'].sum()
assert sum((zcta_afact - 1) < 0.01) == zcta_afact.shape[0], 'Not all ZCTAs have an allocation factor of 1'

In [5]:
# Subset the allocation factor to only the TJHD counties and city
county_FIPs = [51003, 51125, 51540, 51065, 51079, 51109]

tjhd_hu_afact_df = hu_afact_df.loc[hu_afact_df['county14'].apply(lambda x: x in county_FIPs)]

In [6]:
# Determine the sum of each ZCTAs allocation factor in each county/city
tjhd_hu_afact_df = tjhd_hu_afact_df.groupby('zcta5') \
                                    .apply(zcta_stats) \
                                    .reset_index() \
                                    .set_index('zcta5')
del tjhd_hu_afact_df['level_1']

In [7]:
# Read in TJHD ZCTAs and join with allocation factor dataframe
tjhd_hu_afact_df = pd.read_csv('other_data/TJHD_ZCTAs.txt') \
                        .set_index('ZCTA') \
                        .join(tjhd_hu_afact_df, how='left')
tjhd_hu_afact_df.sort_values('afact').to_csv('output/TJHD_allocation_factors.csv')