# ACS Household Income in Phoenix Urban Villages, City of Phoenix, U.S.

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For households by income and race/ethnicity:
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

** Note: Census Block Groups (which are aggregated to Phoenix Urban Village areas) are not available until 2013. 

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from acs_income_vars import *

### REMOVE THIS BLOCK -- UPDATED WITH GEOAGG FUNCTION

bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [52]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols_inc = 'group(B19001)'
cols_inc_w = 'group(B19001A)'
cols_inc_agg = 'group(B19025)'
cols_inc_size = 'group(B19019)'

source = 'acs/acs5'

## Households by Income Band
<30k, 30-50k, 50-75k, 75-100k, +100k

#### Urban Villages

In [5]:
def group_inc(df,year):
    df['i_u30_E'] = df.loc[:,i_u30E].sum(axis=1)
    df['i_u30_M'] = df.apply(lambda x: (calc.get_moe(x[i_u30M])),axis=1)
    df['i_3050_E'] = df.loc[:,i_3050E].sum(axis=1)
    df['i_3050_M'] = df.apply(lambda x: (calc.get_moe(x[i_3050M])),axis=1)
    df['i_5075_E'] = df.loc[:,i_5075E].sum(axis=1)
    df['i_5075_M'] = df.apply(lambda x: (calc.get_moe(x[i_5075M])),axis=1)
    df['i_o100_E'] = df.loc[:,i_o100E].sum(axis=1)
    df['i_o100_M'] = df.apply(lambda x: (calc.get_moe(x[i_o100M])),axis=1)
    df = df.drop(columns=inc_vars).rename(inc_rename,axis=1)
    
    #add end-year into column name
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [6]:
def make_inc_vil(year,cols):
    df = get.get_bgp(source,year,cols)
    df = df.filter(regex='(?<!A)$',axis=1)
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_inc(df,year)
    df = geo_agg.make_uv(df,int(year))
    return df

In [7]:
uv1 = make_inc_vil(y1,cols_inc)

In [8]:
uv1.head()

Unnamed: 0,name,i_u30_21E,i_u30_21M,i_u30_21C,i_tot_21E,i_tot_21M,i_tot_21C,i_5075_21E,i_5075_21M,i_5075_21C,i_3050_21E,i_3050_21M,i_3050_21C,i_o100_21E,i_o100_21M,i_o100_21C,i_75100_21E,i_75100_21M,i_75100_21C
0,Alhambra,13818.0,940.514753,4.137657,48535.0,1548.695903,1.939748,8951.0,869.187552,5.903044,10110.0,988.023785,5.940874,10473.0,773.206311,4.488057,5183.0,606.375296,7.112043
1,North Mountain,16248.0,1184.021115,4.429897,66863.0,1941.481651,1.76515,13303.0,1178.969465,5.387497,14237.0,1129.666765,4.823541,15347.0,1071.279142,4.243393,7728.0,723.30215,5.689666
2,Maryvale,15054.0,1147.874993,4.635289,67881.0,2069.70336,1.853506,14787.0,1088.87373,4.476427,16122.0,1307.730859,4.930984,12378.0,1077.165261,5.290125,9540.0,975.326099,6.21492
3,South Mountain,8912.0,877.74085,5.98722,39548.0,1473.240306,2.264557,7798.0,829.032569,6.462826,7497.0,775.470825,6.287993,10358.0,958.072022,5.622848,4983.0,533.22228,6.505063
4,Laveen,1933.0,337.621089,10.617733,18574.0,921.296369,3.015283,3758.0,530.823888,8.586729,2438.0,422.561238,10.536347,6879.0,723.90469,6.397204,3566.0,442.940177,7.550885


##### Make Phoenix & U.S. function

In [9]:
def make_inc(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_inc)
    elif geo == 'us':
        df = get.get_us(source,year,cols_inc)
        df = df.drop(['NAME','us'],axis=1)
    else:
        pass
    
    df.rename(columns=inc_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_inc(df,year)
    df = geo_agg.sumgeo_cv(df,'GEO_ID')
    return df

In [14]:
ph1,us1 = make_inc(y1,'phoenix'),make_inc(y1,'us')
ph1['name'],us1['name'] = 'Phoenix','US'
ph1,us1 = ph1.drop(['GEO_ID'],axis=1),us1.drop(['GEO_ID'],axis=1)

In [15]:
final =  pd.concat([uv1,ph1,us1])

## Aggregate income

In [41]:
phx_pumas = ['0400112','0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400129']

In [42]:
pum_income = get.get_puma('acs/acs1',y1,cols_inc_agg)

In [43]:
pum_hh = get.get_puma('acs/acs1',y1,'group(B19001)')
pum_hh = pum_hh[['GEO_ID','B19001_001E','B19001_001M']]

pum_agg = pd.merge(pum_hh,pum_income,how='left',on='GEO_ID')

In [44]:
pum_agg = pum_agg[pum_agg.GEO_ID.isin(phx_pumas)]
pum_agg = pum_agg.filter(regex='(?<!A)$',axis=1)
for col in pum_agg.columns[1:]: pum_agg[col] = pum_agg[col].astype(float)

In [46]:
pum_agg.head()

Unnamed: 0,GEO_ID,B19001_001E,B19001_001M,B19025_001E,B19025_001M
0,400112,58257.0,3140.0,9879198000.0,940252738.0
5,400120,42867.0,3030.0,5277466000.0,500777704.0
6,400121,43773.0,2705.0,3470771000.0,301702185.0
10,400128,42676.0,3152.0,4152642000.0,378523556.0
14,400119,35433.0,2540.0,2688408000.0,313691809.0


In [48]:
pum_agg['avg_inc'] = pum_agg['B19025_001E'] / pum_agg['B19001_001E']

In [49]:
pum_agg

Unnamed: 0,GEO_ID,B19001_001E,B19001_001M,B19025_001E,B19025_001M,avg_inc
0,400112,58257.0,3140.0,9879198000.0,940252738.0,169579.57842
5,400120,42867.0,3030.0,5277466000.0,500777704.0,123112.564444
6,400121,43773.0,2705.0,3470771000.0,301702185.0,79290.229137
10,400128,42676.0,3152.0,4152642000.0,378523556.0,97306.258787
14,400119,35433.0,2540.0,2688408000.0,313691809.0,75872.985635
15,400123,35154.0,2748.0,2418772000.0,227598792.0,68805.034989
17,400115,44062.0,3621.0,3389116000.0,337928691.0,76916.97154
18,400118,49399.0,3211.0,3470788000.0,345759446.0,70260.294743
19,400114,47592.0,3071.0,3944076000.0,328904699.0,82872.667675
20,400113,43131.0,2969.0,6010658000.0,670326020.0,139358.183209


pum_agg.to_excel('output/avginc_puma_21.xlsx',index=False)

### PUMA - median HHI by HH Size, total HHs by Size

In [53]:
pum_size = get.get_puma('acs/acs1',y1,cols_inc_size)
pum_size = get.clean_table(pum_size)
pum_size = pum_size[pum_size['GEO_ID'].isin(phx_pumas)]

In [55]:
pumas_rename = {'B19019_001E':'hh_tot_E','B19019_001M':'hh_tot_M','B19019_002E':'hh_1_E','B19019_002M':'hh_1_M',\
               'B19019_003E':'hh_2_E','B19019_003M':'hh_2_M','B19019_004E':'hh_3_E','B19019_004M':'hh_3_M',\
               'B19019_005E':'hh_4_E','B19019_005M':'hh_4_M','B19019_006E':'hh_5_E','B19019_006M':'hh_5_M',\
               'B19019_007E':'hh_6_E','B19019_007M':'hh_6_M','B19019_008E':'hh_7_E','B19019_008M':'hh_7_M'}

In [56]:
pum_size.rename(columns=pumas_rename,inplace=True)
pum_size = geo_agg.sumgeo_cv(pum_size,'GEO_ID')
pum_size

Unnamed: 0,GEO_ID,hh_4_E,hh_4_M,hh_4_C,hh_2_E,hh_2_M,hh_2_C,hh_7_E,hh_7_M,hh_7_C,...,hh_5_C,hh_1_E,hh_1_M,hh_1_C,hh_tot_E,hh_tot_M,hh_tot_C,hh_3_E,hh_3_M,hh_3_C
0,400112,205282.0,17355.0,5.139346,112880.0,4542.0,2.446044,32385.0,230779.0,433.19804,...,14.774731,52394.0,11882.0,13.786121,112821.0,6075.0,3.273335,155559.0,23864.0,9.325716
1,400120,137470.0,33338.0,14.742316,95697.0,16394.0,10.414075,153386.0,186165.0,73.781318,...,58.782293,65934.0,18012.0,16.606825,95271.0,8543.0,5.451095,112612.0,10746.0,5.800912
2,400121,77402.0,32802.0,25.762158,59342.0,15061.0,15.428572,119039.0,32710.0,16.704188,...,17.397587,31919.0,7170.0,13.655386,63729.0,5636.0,5.376108,63552.0,10875.0,10.402414
3,400128,130795.0,19607.0,9.112848,85046.0,6411.0,4.582537,89644.0,103706.0,70.326136,...,35.471,40801.0,10614.0,15.814023,84084.0,6990.0,5.053566,93678.0,14770.0,9.584666
4,400119,81093.0,12584.0,9.433426,67477.0,13216.0,11.906342,53257.0,62283.0,71.093013,...,12.750712,32355.0,15019.0,28.218486,59140.0,8938.0,9.187411,76658.0,24878.0,19.72841
5,400123,53139.0,10663.0,12.198323,49683.0,11127.0,13.614584,82783.0,52309.0,38.412215,...,3.55189,36299.0,6228.0,10.43009,54274.0,5723.0,6.410118,64399.0,7575.0,7.15052
6,400115,95953.0,27084.0,17.158857,68504.0,8849.0,7.85258,67095.0,8038.0,7.282692,...,35.450623,31055.0,4100.0,8.025765,55110.0,6211.0,6.851177,70568.0,32264.0,27.79358
7,400118,49050.0,11355.0,14.072855,68716.0,14807.0,13.099156,39453.0,104244.0,160.622038,...,18.119207,35615.0,3025.0,5.16329,52899.0,5796.0,6.660625,76621.0,20326.0,16.126429
8,400114,81309.0,18044.0,13.490508,82749.0,10166.0,7.468295,54774.0,140844.0,156.314041,...,52.45832,40185.0,11209.0,16.95653,65348.0,6841.0,6.363871,92149.0,16825.0,11.099375
9,400113,108531.0,25356.0,14.202377,104568.0,14388.0,8.364418,0.0,0.0,0.0,...,6.998864,49272.0,8370.0,10.326648,92533.0,10546.0,6.928277,153591.0,14393.0,5.696652


In [58]:
phx_size = get.get_phx(source,y1,cols_inc_size)
phx_size = get.clean_table(phx_size)
phx_size.rename(columns=pumas_rename,inplace=True)
phx_size = geo_agg.sumgeo_cv(phx_size,'GEO_ID')

phx_size.head()

Unnamed: 0,GEO_ID,hh_4_E,hh_4_M,hh_4_C,hh_2_E,hh_2_M,hh_2_C,hh_7_E,hh_7_M,hh_7_C,...,hh_5_C,hh_1_E,hh_1_M,hh_1_C,hh_tot_E,hh_tot_M,hh_tot_C,hh_3_E,hh_3_M,hh_3_C
0,455000,84652.0,2345.0,1.683991,75675.0,1416.0,1.137483,85044.0,6468.0,4.623389,...,2.970513,38612.0,796.0,1.253213,64927.0,854.0,0.799589,83201.0,2037.0,1.488321


In [60]:
final_medhhi_size = pd.concat([pum_size,phx_size])

In [62]:
final_medhhi_size

Unnamed: 0,GEO_ID,hh_4_E,hh_4_M,hh_4_C,hh_2_E,hh_2_M,hh_2_C,hh_7_E,hh_7_M,hh_7_C,...,hh_5_C,hh_1_E,hh_1_M,hh_1_C,hh_tot_E,hh_tot_M,hh_tot_C,hh_3_E,hh_3_M,hh_3_C
0,400112,205282.0,17355.0,5.139346,112880.0,4542.0,2.446044,32385.0,230779.0,433.19804,...,14.774731,52394.0,11882.0,13.786121,112821.0,6075.0,3.273335,155559.0,23864.0,9.325716
1,400120,137470.0,33338.0,14.742316,95697.0,16394.0,10.414075,153386.0,186165.0,73.781318,...,58.782293,65934.0,18012.0,16.606825,95271.0,8543.0,5.451095,112612.0,10746.0,5.800912
2,400121,77402.0,32802.0,25.762158,59342.0,15061.0,15.428572,119039.0,32710.0,16.704188,...,17.397587,31919.0,7170.0,13.655386,63729.0,5636.0,5.376108,63552.0,10875.0,10.402414
3,400128,130795.0,19607.0,9.112848,85046.0,6411.0,4.582537,89644.0,103706.0,70.326136,...,35.471,40801.0,10614.0,15.814023,84084.0,6990.0,5.053566,93678.0,14770.0,9.584666
4,400119,81093.0,12584.0,9.433426,67477.0,13216.0,11.906342,53257.0,62283.0,71.093013,...,12.750712,32355.0,15019.0,28.218486,59140.0,8938.0,9.187411,76658.0,24878.0,19.72841
5,400123,53139.0,10663.0,12.198323,49683.0,11127.0,13.614584,82783.0,52309.0,38.412215,...,3.55189,36299.0,6228.0,10.43009,54274.0,5723.0,6.410118,64399.0,7575.0,7.15052
6,400115,95953.0,27084.0,17.158857,68504.0,8849.0,7.85258,67095.0,8038.0,7.282692,...,35.450623,31055.0,4100.0,8.025765,55110.0,6211.0,6.851177,70568.0,32264.0,27.79358
7,400118,49050.0,11355.0,14.072855,68716.0,14807.0,13.099156,39453.0,104244.0,160.622038,...,18.119207,35615.0,3025.0,5.16329,52899.0,5796.0,6.660625,76621.0,20326.0,16.126429
8,400114,81309.0,18044.0,13.490508,82749.0,10166.0,7.468295,54774.0,140844.0,156.314041,...,52.45832,40185.0,11209.0,16.95653,65348.0,6841.0,6.363871,92149.0,16825.0,11.099375
9,400113,108531.0,25356.0,14.202377,104568.0,14388.0,8.364418,0.0,0.0,0.0,...,6.998864,49272.0,8370.0,10.326648,92533.0,10546.0,6.928277,153591.0,14393.0,5.696652


with pd.ExcelWriter(f'output/HH_inc.xlsx') as writer:
    uv1.to_excel(writer, sheet_name="urban_village", index=False)
    ph1.to_excel(writer, sheet_name="phoenix", index=False)
    us1.to_excel(writer, sheet_name="us", index=False)