In [213]:
import pandas as pd

#load acs tables
acs_snap = pd.read_csv('/Users/tim/Desktop/bdt/tract_all_snap/ACSST5Y2020.S2201-Data.csv',skiprows = [1],dtype = "string")
acs_employment = pd.read_csv('/Users/tim/Desktop/bdt/tract_all_employment/ACSST5Y2020.S2301-Data.csv', skiprows = [1], dtype = "string")

#snap columns
#S2201_C01_001E = total households
#S2201_C01_035E = work status
#S2201_C01_021E = poverty status last 12mo
#S2201_C03_001E = receiving snap

#employment columns
#S2301_C01_001E = total pop 16yr+
#S2301_C02_001E = labor force participation rate 16yr+
#S2301_C03_001E = employment/population ratio
#S2301_C04_001E = unemployment rate

#functions to add geo columns for state, county, tract based on slice of geoid
def get_state(row):
    match row['GEO_ID'][9:11]:
        case '08':
            a = 'Colorado'
        case '24':
            a = 'Maryland'
        case '26':
            a = 'Michigan'
        case '36':
            a = 'New York'
        case '37':
            a = 'North Carolina'
        case '42':
            a = 'Pennsylvania'
        case '45':
            a = 'South Carolina'
        case _:
            a = 'N/A'
    return a

def get_county(row):
    return row['GEO_ID'][11:14]

def get_tract(row):
    return row['GEO_ID'][14:20]

def get_geo(row):
    return row['GEO_ID'][9:20]

#add the geo columns
acs_snap['state'] = acs_snap.apply(get_state, axis=1)
acs_snap['county'] = acs_snap.apply(get_county, axis=1)
acs_snap['tract'] = acs_snap.apply(get_tract, axis=1)
acs_snap['geo'] = acs_snap.apply(get_geo, axis=1)
acs_employment['state'] = acs_employment.apply(get_state, axis=1)
acs_employment['county'] = acs_employment.apply(get_county, axis=1)
acs_employment['tract'] = acs_employment.apply(get_tract, axis=1)
acs_employment['geo'] = acs_employment.apply(get_geo, axis=1)


#drop N/A rows from employment table, I messed up in getting the acs table from the census
acs_employment.drop(acs_employment.loc[acs_employment['state']=='N/A'].index, inplace=True)

#pretty col names
acs_snap = acs_snap.rename(columns={'S2201_C01_001E': 'snap_total_households','S2201_C01_035E': 'snap_work_status', 'S2201_C01_021E': 'snap_poverty_status', 'S2201_C03_001E': 'snap_receiving_snap'})
acs_employment = acs_employment.rename(columns={'S2301_C01_001E': 'emp_total_pop','S2301_C02_001E':'emp_labor_participation','S2301_C03_001E':'emp_employment_population_ratio','S2301_C04_001E':'emp_unemployment_rate'})

#convert to numeric
acs_snap[['snap_total_households','snap_work_status','snap_poverty_status','snap_receiving_snap']] = acs_snap[['snap_total_households','snap_work_status','snap_poverty_status','snap_receiving_snap']].apply(pd.to_numeric)
acs_employment[['emp_total_pop','emp_labor_participation','emp_employment_population_ratio','emp_unemployment_rate']] = acs_employment[['emp_total_pop','emp_labor_participation','emp_employment_population_ratio','emp_unemployment_rate']].apply(pd.to_numeric, args=('coerce',))

#add columns
acs_employment['emp_total_unemployed'] = acs_employment['emp_total_pop']*0.01*acs_employment['emp_unemployment_rate']
acs_snap['pct_snap'] = acs_snap['snap_receiving_snap']/acs_snap['snap_total_households']

#build aggregate frames
snap_state = acs_snap.groupby('state',group_keys=True)[['snap_total_households','snap_receiving_snap','snap_work_status','snap_poverty_status']].sum()
employment_state = acs_employment.groupby('state',group_keys=True)[['emp_total_pop','emp_total_unemployed']].sum()

#add more columns
snap_state['st_pct_snap'] = snap_state['snap_receiving_snap']/snap_state['snap_total_households']
snap_state['st_pct_work'] = snap_state['snap_work_status']/snap_state['snap_total_households']
snap_state['st_pct_poverty'] = snap_state['snap_poverty_status']/snap_state['snap_total_households']

employment_state['st_pct_unemployed'] = employment_state['emp_total_unemployed']/employment_state['emp_total_pop']

#push the index
snap_state = snap_state.reset_index()
employment_state = employment_state.reset_index()

#join the two source frames, to get unemployment rate and pct_snap in the same place
pre_snap = acs_snap[['state','county','tract','geo','snap_total_households','snap_work_status','snap_poverty_status','snap_receiving_snap','pct_snap']]
pre_employment = acs_employment[['state','county','tract','geo','emp_total_pop','emp_total_unemployed','emp_unemployment_rate']]
source_merge = pd.merge(
    pre_snap,
    pre_employment,
    how="inner",
    on="geo")

#fix percentage from the acs table
source_merge = source_merge.assign(emp_unemployment_rate = source_merge['emp_unemployment_rate'] * 0.01)

#fix state column
source_merge = source_merge.rename(columns={'state_x':'state'})

#join snap state table
source_merge_s = pd.merge(
    source_merge,
    snap_state,
    how="inner",
    on="state")

#join employment state table
source_merge_se = pd.merge(
    source_merge_s,
    employment_state,
    how="inner",
    on="state")

#final table
final = source_merge_se[['geo','state','county_x','tract_x','pct_snap','emp_unemployment_rate','st_pct_unemployed','st_pct_snap']]

print(final)


In [238]:
final.to_csv('final.csv')

In [201]:
source_merge_se.loc[source_merge_se['state']=='Colorado'][['state','county','tract','total_households_x']]

Unnamed: 0,state,county,tract,total_households_x
0,Colorado,001,007801,1222
1,Colorado,041,007801,1222
2,Colorado,001,007801,740
3,Colorado,041,007801,740
4,Colorado,001,007801,1526
...,...,...,...,...
10750,Colorado,123,002208,2583
10751,Colorado,123,002209,1015
10752,Colorado,123,002210,443
10753,Colorado,125,963100,1674


In [202]:
ing t

SyntaxError: invalid syntax (172632658.py, line 1)