# ACS Household Income in Phoenix Urban Villages, City of Phoenix, U.S.

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For households by income and race/ethnicity:
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

** Note: Census Block Groups (which are aggregated to Phoenix Urban Village areas) are not available until 2013. 

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import getters as get
import utilcalcs as calc
import geo_agg
from acs_income_vars import *
from geos import *

In [3]:
#Search parameters
y1 = '2021'
y0_1 = '2013'
y0_5 = '2013'

cols_inc = 'group(B19001)'
cols_inc_w = 'group(B19001A)'
cols_inc_agg = 'group(B19025),B19001_001E,B19001_001M'
cols_inc_size = 'group(B19019)'

source_5 = 'acs/acs5'
source_1 = 'acs/acs1'

In [4]:
#comp cities and big geo rename dictionary
rename_geos = {'0455000':'Phoenix', '04013':'Maricopa', '0100000US':'US', '0400000US04':'AZ',\
               '0473000':'Tempe','0465000':'Scottsdale', '0427820':'Glendale',\
               '4865000':'San Antonio', '4835000':'Houston', '1235000':'Jacksonville'}

In [5]:
hh_size_rename = {'B19019_001E':'hh_tot_E','B19019_001M':'hh_tot_M','B19019_002E':'hh_1_E','B19019_002M':'hh_1_M',\
               'B19019_003E':'hh_2_E','B19019_003M':'hh_2_M','B19019_004E':'hh_3_E','B19019_004M':'hh_3_M',\
               'B19019_005E':'hh_4_E','B19019_005M':'hh_4_M','B19019_006E':'hh_5_E','B19019_006M':'hh_5_M',\
               'B19019_007E':'hh_6_E','B19019_007M':'hh_6_M','B19019_008E':'hh_7_E','B19019_008M':'hh_7_M'}

## Households by Income Band
<30k, 30-50k, 50-75k, 75-100k, +100k

### Urban Villages

In [6]:
def group_inc(df,year):
    df['i_u30_E'] = df.loc[:,i_u30E].sum(axis=1)
    df['i_u30_M'] = df.apply(lambda x: (calc.get_moe(x[i_u30M])),axis=1)
    df['i_3050_E'] = df.loc[:,i_3050E].sum(axis=1)
    df['i_3050_M'] = df.apply(lambda x: (calc.get_moe(x[i_3050M])),axis=1)
    df['i_5075_E'] = df.loc[:,i_5075E].sum(axis=1)
    df['i_5075_M'] = df.apply(lambda x: (calc.get_moe(x[i_5075M])),axis=1)
    df['i_o100_E'] = df.loc[:,i_o100E].sum(axis=1)
    df['i_o100_M'] = df.apply(lambda x: (calc.get_moe(x[i_o100M])),axis=1)
    df = df.drop(columns=inc_vars).rename(inc_rename,axis=1)
    
    #add end-year into column name
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [7]:
# make city and larger geos
def make_table(source,year,cols,geotype='big'):
    if geotype=='big':
        ph = get.get_phx(source,year,cols)
        mar = get.get_maricopa(source,year,cols)
        us = get.get_us(source,year,cols)
        az = get.get_az(source,year,cols)
        cit = get.get_comp_cities(source,year,cols)
        df = pd.concat([ph,mar,us,az,cit])
        df = df.drop(['NAME','us','state'],axis=1)
        aggtype='GEO_ID'
    elif geotype=='uv':
        df = get.get_bgp(source,year,cols)
        aggtype='name'
    elif geotype=='az_places':
        df = get.get_az_plc(source,year,cols)
        aggtype='GEO_ID'
    else:
        pass
    df = get.clean_table(df)
    df = group_inc(df,year)
    if geotype=='uv': df = geo_agg.make_uv(df,int(year))
    df = geo_agg.sumgeo_cv(df,aggtype)
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

### 1 year - Income by Band for Large Geos

In [8]:
geo_y1_1y = make_table(source_1,y1,cols_inc)

In [9]:
geo_y1_1y['GEO_ID'] = geo_y1_1y.GEO_ID.map(rename_geos)

### 5 year - Income by Band for Smaller Geos

In [10]:
uv_y1_5y = make_table(source_5,y1,cols_inc,'uv')
uv_y1_5y.rename({'name':'GEO_ID'},axis=1,inplace=True)

pl_y1_5y = make_table(source_5,y1,cols_inc,'az_places')
geo_y1_5y = make_table(source_5,y1,cols_inc)

In [11]:
big5 = pd.concat([uv_y1_5y,geo_y1_5y,pl_y1_5y])

## Aggregate income for PUMAs, City, County, AZ, U.S.

In [14]:
def calc_avg_income(source,year):
    cols = cols_inc_agg
    pum = get.get_puma(source,year,cols)
    pum = pum[pum.GEO_ID.isin(phx_pumas)]
    mar = get.get_maricopa(source,year,cols)
    us = get.get_us(source,year,cols)
    az = get.get_az(source,year,cols)
    pl = get.get_az_plc(source,year,cols)
    df = pd.concat([pum,mar,us,az,pl])
    df = df.drop(['NAME','us','state'],axis=1)
    df = get.clean_table(df)
    df.rename({'B19001_001E':'tot_hh_E','B19001_001M':'tot_hh_M',\
                'B19025_001E':'tot_inc_E','B19025_001M':'tot_inc_M',},axis=1,inplace=True)
    df = geo_agg.sumgeo_cv(df,'GEO_ID')
    df['avg_inc_E'] = df['tot_inc_E'] / df['tot_hh_E']
    return df

In [15]:
avg_inc = calc_avg_income(source_1,y1)

### Median HHI by HH Size, total HHs by Size

In [18]:
def make_hh_size(source,year):
    cols = cols_inc_size
    pum = get.get_puma(source,year,cols)
    pum = pum[pum.GEO_ID.isin(phx_pumas)]
    mar = get.get_maricopa(source,year,cols)
    us = get.get_us(source,year,cols)
    az = get.get_az(source,year,cols)
    pl = get.get_az_plc(source,year,cols)
    df = pd.concat([pum,mar,us,az,pl])
    df = df.drop(['NAME','us','state'],axis=1)
    df = get.clean_table(df)
    df.rename(columns=hh_size_rename,inplace=True)
    df = geo_agg.sumgeo_cv(df,'GEO_ID')
    return df

In [19]:
big_hhsize = make_hh_size(source_1,y1)

In [20]:
with pd.ExcelWriter(f'output/HH_inc.xlsx') as writer:
    geo_y1_1y.to_excel(writer, sheet_name="Inc_Band_1YR", index=False)
    big5.to_excel(writer, sheet_name="Inc_Band_5YR", index=False)
    avg_inc.to_excel(writer, sheet_name="Inc_Avg", index=False)
    big_hhsize.to_excel(writer, sheet_name="Inc_HHSize_1YR", index=False)