# ACS Household Income in Phoenix Urban Villages, City of Phoenix, U.S.

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For households by income and race/ethnicity:
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

** Note: Census Block Groups (which are aggregated to Phoenix Urban Village areas) are not available until 2013. 

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from acs_income_vars import *

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols_inc = 'group(B19001)'
cols_inc_w = 'group(B19001A)'
cols_inc_agg = 'group(B19025)'

source = 'acs/acs5'

## Households by Income Band
<30k, 30-50k, 50-75k, 75-100k, +100k

#### Urban Villages

In [5]:
def group_inc(df,year):
    df['i_u30_E'] = df.loc[:,i_u30E].sum(axis=1)
    df['i_u30_M'] = df.apply(lambda x: (calc.get_moe(x[i_u30M])),axis=1)
    df['i_3050_E'] = df.loc[:,i_3050E].sum(axis=1)
    df['i_3050_M'] = df.apply(lambda x: (calc.get_moe(x[i_3050M])),axis=1)
    df['i_5075_E'] = df.loc[:,i_5075E].sum(axis=1)
    df['i_5075_M'] = df.apply(lambda x: (calc.get_moe(x[i_5075M])),axis=1)
    df['i_o100_E'] = df.loc[:,i_o100E].sum(axis=1)
    df['i_o100_M'] = df.apply(lambda x: (calc.get_moe(x[i_o100M])),axis=1)
    df = df.drop(columns=inc_vars).rename(inc_rename,axis=1)
    
    #add end-year into column name
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [6]:
def make_inc_vil(geo_df,year,cols):
    df = get.get_bgp(source,year,cols)
    df = df.filter(regex='(?<!A)$',axis=1)
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_inc(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [7]:
uv1 = make_inc_vil(bgp_20,y1,cols_inc)

In [8]:
uv1.head()

Unnamed: 0,name,i_75100_21E,i_75100_21M,i_75100_21C,i_o100_21E,i_o100_21M,i_o100_21C,i_3050_21E,i_3050_21M,i_3050_21C,i_5075_21E,i_5075_21M,i_5075_21C,i_u30_21E,i_u30_21M,i_u30_21C,i_tot_21E,i_tot_21M,i_tot_21C
0,Alhambra,5183.0,606.375296,7.112043,10473.0,773.206311,4.488057,10110.0,988.023785,5.940874,8951.0,869.187552,5.903044,13818.0,940.514753,4.137657,48535.0,1548.695903,1.939748
1,North Mountain,7728.0,723.30215,5.689666,15347.0,1071.279142,4.243393,14237.0,1129.666765,4.823541,13303.0,1178.969465,5.387497,16248.0,1184.021115,4.429897,66863.0,1941.481651,1.76515
2,Maryvale,9540.0,975.326099,6.21492,12378.0,1077.165261,5.290125,16122.0,1307.730859,4.930984,14787.0,1088.87373,4.476427,15054.0,1147.874993,4.635289,67881.0,2069.70336,1.853506
3,South Mountain,4983.0,533.22228,6.505063,10358.0,958.072022,5.622848,7497.0,775.470825,6.287993,7798.0,829.032569,6.462826,8912.0,877.74085,5.98722,39548.0,1473.240306,2.264557
4,Laveen,3566.0,442.940177,7.550885,6879.0,723.90469,6.397204,2438.0,422.561238,10.536347,3758.0,530.823888,8.586729,1933.0,337.621089,10.617733,18574.0,921.296369,3.015283


##### Make Phoenix & U.S. function

In [9]:
def make_inc(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_inc)
    elif geo == 'us':
        df = get.get_us(source,year,cols_inc)
        df = df.drop(['NAME','us'],axis=1)
    else:
        pass
    
    df.rename(columns=inc_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_inc(df,year)
    df = geo_agg.sumgeo_cv(df,'GEO_ID')
    return df

### City of Phoenix & U.S.

In [10]:
ph1 = make_inc(y1,'phoenix')
us1 = make_inc(y1,'us')

In [11]:
ph1.head()

Unnamed: 0,GEO_ID,i_75100_21E,i_75100_21M,i_75100_21C,i_o100_21E,i_o100_21M,i_o100_21C,i_3050_21E,i_3050_21M,i_3050_21C,i_5075_21E,i_5075_21M,i_5075_21C,i_u30_21E,i_u30_21M,i_u30_21C,i_tot_21E,i_tot_21M,i_tot_21C
0,455000,78450.0,2480.0,1.921732,173831.0,3470.972918,1.213831,105129.0,3066.725941,1.773318,105274.0,2780.022482,1.605319,117192.0,3115.801662,1.61624,579876.0,2997.0,0.314185


In [12]:
us1.head()

Unnamed: 0,GEO_ID,i_75100_21E,i_75100_21M,i_75100_21C,i_o100_21E,i_o100_21M,i_o100_21C,i_3050_21E,i_3050_21M,i_3050_21C,i_5075_21E,i_5075_21M,i_5075_21C,i_u30_21E,i_u30_21M,i_u30_21C,i_tot_21E,i_tot_21M,i_tot_21C
0,0100000US,15895589.0,44008.0,0.168302,41662649.0,97392.519205,0.142106,19259497.0,38265.320213,0.12078,20845331.0,39549.492854,0.115336,26347926.0,45154.223933,0.10418,124010992.0,196755.0,0.096449


In [13]:
with pd.ExcelWriter(f'output/HH_inc.xlsx') as writer:
    uv1.to_excel(writer, sheet_name="urban_village", index=False)
    ph1.to_excel(writer, sheet_name="phoenix", index=False)
    us1.to_excel(writer, sheet_name="us", index=False)

## Aggregate income to calculate mean income

In [14]:
## ADD MORE HERE