# How Many Washingtonians are Low- or Middle- Income?

In [1]:
# import necessary packages
import pandas as pd
import numpy as np

## State-level data

Data Source: 2018 American Community Survey via IPUMS NHGIS, University of Minnesota, www.nhgis.org

In [2]:
states = pd.read_csv('../data/2018_state.csv')

In [3]:
wa = states.loc[states['STATE'] == 'Washington']

In [4]:
wa

Unnamed: 0,GISJOIN,YEAR,REGIONA,DIVISIONA,STATE,STATEA,COUNTYA,COUSUBA,PLACEA,TRACTA,...,AJY9M017,AJZAM001,AJ0EM001,AJ1CM001,AJ1CM002,AJ1CM003,AJ1CM004,AJ1CM005,AJ1CM006,AJ1CM007
48,G530,2014-2018,,,Washington,53,,,,,...,2752,308,168,1574,6117,5945,6919,3629,1425,6190


### ratio of income to poverty level - population for whom poverty status is determined

In [5]:
income_to_pov = wa.loc[:, 'AJY4E001': 'AJY4E008']
income_to_pov

Unnamed: 0,AJY4E001,AJY4E002,AJY4E003,AJY4E004,AJY4E005,AJY4E006,AJY4E007,AJY4E008
48,7161708,377106,444515,255057,273495,395859,177498,5238178


In [6]:
income_to_pov.columns = ['Total', 
        'Under .50',
        '.50 to .99',
        '1.00 to 1.24',
        '1.25 to 1.49',
        '1.50 to 1.84',
        '1.85 to 1.99',
        '2.00 and over']
income_to_pov.columns = income_to_pov.columns.str.replace(' ', '_').str.lower()

In [7]:
income_to_pov = income_to_pov.astype(int)

In [8]:
income_to_pov

Unnamed: 0,total,under_.50,.50_to_.99,1.00_to_1.24,1.25_to_1.49,1.50_to_1.84,1.85_to_1.99,2.00_and_over
48,7161708,377106,444515,255057,273495,395859,177498,5238178


In [30]:
less_than_2x_poverty = sum(income_to_pov.loc[48, 'under_.50':'1.85_to_1.99'])

In [31]:
less_than_2x_poverty

1923530

In [32]:
less_than_2x_poverty / income_to_pov['total']

48    0.268585
Name: total, dtype: float64

### household income

In [11]:
house_income = wa.loc[:,'AJY9E001':'AJY9E017'].astype(int)

In [12]:
house_income.columns = ['Total',
        'Less than $10,000',
        '$10,000 to $14,999',
        '$15,000 to $19,999',
        '$20,000 to $24,999',
        '$25,000 to $29,999',
        '$30,000 to $34,999',
        '$35,000 to $39,999',
        '$40,000 to $44,999',
        '$45,000 to $49,999',
        '$50,000 to $59,999',
        '$60,000 to $74,999',
        '$75,000 to $99,999',
        '$100,000 to $124,999',
        '$125,000 to $149,999',
        '$150,000 to $199,999',
        '$200,000 or more']
house_income.columns = house_income.columns.str.replace(' ', '_').str.replace('$', '').str.lower()

In [13]:
house_income

Unnamed: 0,total,"less_than_10,000","10,000_to_14,999","15,000_to_19,999","20,000_to_24,999","25,000_to_29,999","30,000_to_34,999","35,000_to_39,999","40,000_to_44,999","45,000_to_49,999","50,000_to_59,999","60,000_to_74,999","75,000_to_99,999","100,000_to_124,999","125,000_to_149,999","150,000_to_199,999","200,000_or_more"
48,2800423,144695,95789,99129,104429,107027,112120,110303,115143,105170,210273,286122,381845,281885,196331,215941,234221


In [15]:
people_per_house = income_to_pov.loc[48, 'total']/house_income.loc[48, 'total']
print(f"Average people per household: {income_to_pov.loc[48, 'total']/house_income.loc[48, 'total']}")

Average people per household: 2.5573665121304887


In [16]:
median_house_income = wa.loc[:, 'AJZAE001'].astype(int)
median_house_income = median_house_income[48]
median_house_income

70116

In [17]:
print(f"""HUD income levels:
low (80% median): {median_house_income*.8}
very low (50% median): {median_house_income*.5}
extremely low(30% median): {median_house_income*.3}""")

HUD income levels:
low (80% median): 56092.8
very low (50% median): 35058.0
extremely low(30% median): 21034.8


In [18]:
low_income = (sum(house_income.loc[48, 'less_than_10,000': '45,000_to_49,999']), 
              sum(house_income.loc[48, 'less_than_10,000': '50,000_to_59,999']))

In [19]:
print(f"""Households @ HUD income levels:
low: {low_income[0]} - {low_income[1]}
very low: {sum(house_income.loc[48, 'less_than_10,000': '30,000_to_34,999'])}
extremely low: {sum(house_income.loc[48, 'less_than_10,000': '15,000_to_19,999'])}""")

Households @ HUD income levels:
low: 993805 - 1204078
very low: 663189
extremely low: 339613


In [20]:
print(f"proportion of households that are low income: {low_income[0]/house_income.loc[48, 'total']} - {low_income[1]/house_income.loc[48, 'total']}")

proportion of households that are low income: 0.35487674540596187 - 0.4299629020330143


In [21]:
print(f"people in low income households based on avg household size: {low_income[0]*2.6} - {low_income[1]*2.6}")

people in low income households based on avg household size: 2583893.0 - 3130602.8000000003


## Low-moderate Income Households 

80% of Washington State Median household income

In [157]:
income_80 = .8 * median_house_income
income_75 = .75 * median_house_income
income_70 = .70 * median_house_income
income_65 = .65 * median_house_income
income_60 = .60 * median_house_income
income_55 = .55 * median_house_income
income_50 = .50 * median_house_income

In [158]:
wa_households = wa.loc[:, 'STATE':'STATEA'].copy()

In [159]:
wa_households['80_threshold'] = income_80
wa_households['75_threshold'] = income_75
wa_households['70_threshold'] = income_70
wa_households['65_threshold'] = income_65
wa_households['60_threshold'] = income_60
wa_households['55_threshold'] = income_55
wa_households['50_threshold'] = income_50
wa_households['avg_household_count'] = people_per_house

In [160]:
wa_households = est_households(wa_households, '80', income_cols, house_income)
wa_households = est_households(wa_households, '75', income_cols, house_income)
wa_households = est_households(wa_households, '70', income_cols, house_income)
wa_households = est_households(wa_households, '65', income_cols, house_income)
wa_households = est_households(wa_households, '60', income_cols, house_income)
wa_households = est_households(wa_households, '55', income_cols, house_income)
wa_households = est_households(wa_households, '50', income_cols, house_income)


# wa_people = wa_households * wa_households['avg_household_count'], 0)

In [161]:
wa_households.T

Unnamed: 0,48
STATE,Washington
STATEA,53
80_threshold,56092.8
75_threshold,52587
70_threshold,49081.2
65_threshold,45575.4
60_threshold,42069.6
55_threshold,38563.8
50_threshold,35058
avg_household_count,2.55737


In [176]:
wa_people = wa_households.loc[:, 'total_hh_80':'total_hh_50'] * wa_households.loc[48, 'avg_household_count']
wa_people.columns = [col.replace('_hh_', '') for col in wa_people.columns]

In [177]:
wa_people

Unnamed: 0,total80,total75,total70,total65,total60,total55,total50
48,2869194.0,2680652.0,2492144.0,2303523.0,2100011.0,1897117.0,1699290.0


In [180]:
print(f"% of washingtonias in households that earn less than 80% of state median income: {wa_people.loc[48, 'total80']/income_to_pov.loc[48, 'total']}")

% of washingtonias in households that earn less than 80% of state median income: 0.400629814222574


## County Level Data

In [42]:
counties = pd.read_csv('../data/2018_county.csv', encoding='latin-1')

In [43]:
wa_counties = counties[counties['STATE'] == 'Washington']

In [44]:
# check for correct county count
wa_counties.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39 entries, 2954 to 2992
Columns: 108 entries, GISJOIN to AJ1CM007
dtypes: object(108)
memory usage: 33.2+ KB


In [45]:
wa_counties.head()

Unnamed: 0,GISJOIN,YEAR,REGIONA,DIVISIONA,STATE,STATEA,COUNTY,COUNTYA,COUSUBA,PLACEA,...,AJY9M017,AJZAM001,AJ0EM001,AJ1CM001,AJ1CM002,AJ1CM003,AJ1CM004,AJ1CM005,AJ1CM006,AJ1CM007
2954,G5300010,2014-2018,,,Washington,53,Adams County,1,,,...,60,5961,1088,90,298,298,317,181,19,279
2955,G5300030,2014-2018,,,Washington,53,Asotin County,3,,,...,85,2558,1546,97,366,366,410,211,22,370
2956,G5300050,2014-2018,,,Washington,53,Benton County,5,,,...,358,2031,666,290,1074,1060,1091,521,111,1068
2957,G5300070,2014-2018,,,Washington,53,Chelan County,7,,,...,217,2615,1085,202,807,810,814,357,37,826
2958,G5300090,2014-2018,,,Washington,53,Clallam County,9,,,...,192,1998,952,191,758,746,799,274,82,791


In [46]:
c_populations = wa_counties.loc[:,['COUNTY','AJWME001']]
c_populations.columns = ['county', 'population']
c_populations['population'] = c_populations['population'].astype(int)
c_populations

Unnamed: 0,county,population
2954,Adams County,19452
2955,Asotin County,22337
2956,Benton County,194168
2957,Chelan County,75757
2958,Clallam County,74487
2959,Clark County,465384
2960,Columbia County,4001
2961,Cowlitz County,105112
2962,Douglas County,41371
2963,Ferry County,7576


### Household Income

In [47]:
c_house_income = wa_counties.loc[:,'AJY9E001':'AJY9E017'].astype(int)

In [48]:
c_house_income.columns = ['Total',
        'Less than $10,000',
        '$10,000 to $14,999',
        '$15,000 to $19,999',
        '$20,000 to $24,999',
        '$25,000 to $29,999',
        '$30,000 to $34,999',
        '$35,000 to $39,999',
        '$40,000 to $44,999',
        '$45,000 to $49,999',
        '$50,000 to $59,999',
        '$60,000 to $74,999',
        '$75,000 to $99,999',
        '$100,000 to $124,999',
        '$125,000 to $149,999',
        '$150,000 to $199,999',
        '$200,000 or more']
c_house_income.columns = c_house_income.columns.str.replace(' ', '_').str.replace('$', '').str.lower()

In [49]:
c_populations['avg_household_count'] = c_populations['population']/c_house_income['total']

In [50]:
c_house_income

Unnamed: 0,total,"less_than_10,000","10,000_to_14,999","15,000_to_19,999","20,000_to_24,999","25,000_to_29,999","30,000_to_34,999","35,000_to_39,999","40,000_to_44,999","45,000_to_49,999","50,000_to_59,999","60,000_to_74,999","75,000_to_99,999","100,000_to_124,999","125,000_to_149,999","150,000_to_199,999","200,000_or_more"
2954,5881,386,335,358,431,343,198,328,409,182,430,701,878,383,185,216,118
2955,9171,528,480,637,390,529,612,485,414,451,876,822,1276,722,300,423,226
2956,70983,3291,2945,2723,3138,3069,2884,2725,2733,2783,5926,8062,9785,7041,4638,4919,4321
2957,28038,1820,947,1138,1575,1418,1528,1123,1870,1209,2174,3198,3356,2433,1523,1516,1210
2958,32732,2034,1952,1935,1691,1943,1906,1639,1782,1510,2637,3393,4161,2289,1472,1428,960
2959,171522,6662,4688,5274,6640,6228,6932,7225,7406,6739,13693,18513,25055,19111,13526,12522,11308
2960,1758,120,123,115,87,102,48,128,63,73,98,206,233,129,62,76,95
2961,41397,2954,1892,2209,2463,2443,2100,2118,1951,1759,3156,4632,5324,3387,1762,2050,1197
2962,15064,555,502,615,528,1060,630,695,814,694,1392,1819,2352,1110,1068,746,484
2963,3097,361,171,208,276,129,174,141,268,93,203,314,306,136,141,125,51


In [51]:
c_populations[['county','avg_household_count']]

Unnamed: 0,county,avg_household_count
2954,Adams County,3.307601
2955,Asotin County,2.435612
2956,Benton County,2.735416
2957,Chelan County,2.70194
2958,Clallam County,2.275663
2959,Clark County,2.713261
2960,Columbia County,2.275882
2961,Cowlitz County,2.539121
2962,Douglas County,2.746349
2963,Ferry County,2.446238


In [52]:
c_median_house_income.dtypes

county               object
med_house_income      int32
low_threshold       float64
v_low_threshold     float64
x_low_threshold     float64
dtype: object

In [53]:
c_median_house_income = wa_counties.loc[:, ['COUNTY','AJZAE001']]
c_median_house_income.columns = ['county', 'med_house_income']
c_median_house_income['med_house_income'] = c_median_house_income['med_house_income'].astype(int)
c_median_house_income['low_threshold'] = .8 * c_median_house_income['med_house_income']
c_median_house_income['v_low_threshold'] = .5 * c_median_house_income['med_house_income']
c_median_house_income['x_low_threshold'] = .3 * c_median_house_income['med_house_income']
c_median_house_income

county              object
med_house_income     int32
dtype: object


Unnamed: 0,county,med_house_income,low_threshold,v_low_threshold,x_low_threshold
2954,Adams County,49142,39313.6,24571.0,14742.6
2955,Asotin County,50423,40338.4,25211.5,15126.9
2956,Benton County,65650,52520.0,32825.0,19695.0
2957,Chelan County,56135,44908.0,28067.5,16840.5
2958,Clallam County,49913,39930.4,24956.5,14973.9
2959,Clark County,71636,57308.8,35818.0,21490.8
2960,Columbia County,51111,40888.8,25555.5,15333.3
2961,Cowlitz County,51752,41401.6,25876.0,15525.6
2962,Douglas County,60452,48361.6,30226.0,18135.6
2963,Ferry County,41924,33539.2,20962.0,12577.2


In [54]:
c_house_income_thresholds = c_house_income.copy()

In [65]:
# extracting start and end values from column names
income_cols = []
for i, col in enumerate(c_house_income_thresholds.columns):
    splits = col.replace(',', '').split('_')
    income_range = [np.nan, np.nan]
    if splits[0].isnumeric():
        income_range[0] = int(splits[0])
    if len(splits)>1 and splits[2].isnumeric():
        income_range[1] = int(splits[2])
    income_cols.append(income_range)

In [66]:
income_cols

[[nan, nan],
 [nan, 10000],
 [10000, 14999],
 [15000, 19999],
 [20000, 24999],
 [25000, 29999],
 [30000, 34999],
 [35000, 39999],
 [40000, 44999],
 [45000, 49999],
 [50000, 59999],
 [60000, 74999],
 [75000, 99999],
 [100000, 124999],
 [125000, 149999],
 [150000, 199999],
 [200000, nan]]

In [67]:
def get_col(n, cols):
    """returns the index and range of the column where n falls within the column range
    params:
      n: value
      cols: list of lists [[col0_min, col0_max], [col1_min, col1_max], [col2_min, col2_max], ...]
    returns:
      (index, range_min, range_max)"""
    for i, col in enumerate(cols):
        if (i == 0) or (n > col[1] and not np.isnan(col[1])):
            continue
        return i

In [68]:
def cal_partial_col(threshold, col_min, col_max, n_hh):
    """returns the number of households proportionate to the location of the threshold within the range of incomes
    """
    return (threshold - col_min) / (col_max - col_min) * n_hh

In [156]:
def est_households(est_hh, threshold, income_cols, c_house_income):
    """Estimates the number of households with incomes below the threshold
    
    params:
      est_hh: data frame with column [threshold name]_threshold """
    est_hh['column_i'] = est_hh[threshold +'_threshold'].apply(lambda x: get_col(x, cols))
    est_hh['col_min'] = [cols[i][0] for i in est_hh['column_i']]
    est_hh['col_max'] = [cols[i][1] for i in est_hh['column_i']]
    est_hh['sum_below'] = [sum(c_house_income.iloc[index, 1:column]) 
                               for index, column in enumerate(est_hh['column_i'])]
    est_hh['partial_ratio'] = ((est_hh[threshold +'_threshold'] - est_hh['col_min']) /
                             (est_hh['col_max'] - est_hh['col_min']))
    est_hh['n_hh_in_band'] = [c_house_income.iloc[index, column] for index, column in enumerate(est_hh['column_i'])]
    est_hh['total_hh_' + threshold] = est_hh['sum_below'] + est_hh['n_hh_in_band']*est_hh['partial_ratio']
    return est_hh.drop(columns=['column_i', 'col_min', 'col_max', 'sum_below', 'partial_ratio', 'n_hh_in_band'])

In [145]:
est_hh = c_median_house_income.copy()
est_hh = est_households(est_hh, 'low', income_cols, c_house_income)
est_hh = est_households(est_hh, 'v_low', income_cols, c_house_income)
est_hh = est_households(est_hh, 'x_low', income_cols, c_house_income)

In [146]:
est_hh['population_low'] = round(est_hh['total_hh_low'] * c_populations['avg_household_count'], 0)
est_hh['population_v_low'] = round(est_hh['total_hh_v_low'] * c_populations['avg_household_count'], 0)
est_hh['population_x_low'] = round(est_hh['total_hh_x_low'] * c_populations['avg_household_count'], 0)

In [147]:
est_hh

Unnamed: 0,county,med_house_income,low_threshold,v_low_threshold,x_low_threshold,column_i,col_min,col_max,sum_below,partial_ratio,n_hh_in_band,total_hh_low,total_hh_v_low,total_hh_x_low,population_low,population_v_low,population_x_low
2954,Adams County,49142,39313.6,24571.0,14742.6,2,10000,14999,386,0.94871,335,2334.028766,1473.09902,703.817764,7720.0,4872.0,2328.0
2955,Asotin County,50423,40338.4,25211.5,15126.9,3,15000,19999,1008,0.025385,637,3689.025125,2057.381176,1024.170294,8985.0,5011.0,2494.0
2956,Benton County,65650,52520.0,32825.0,19695.0,3,15000,19999,6236,0.939188,2723,27784.50135,16795.785957,8793.408482,76002.0,45943.0,24054.0
2957,Chelan County,56135,44908.0,28067.5,16840.5,3,15000,19999,2767,0.368174,1138,11384.959192,6350.117023,3185.981596,30761.0,17158.0,8608.0
2958,Clallam County,49913,39930.4,24956.5,14973.9,2,10000,14999,2034,0.994979,1952,13077.508422,7597.623625,3976.199,29760.0,17290.0,9048.0
2959,Clark County,71636,57308.8,35818.0,21490.8,4,20000,24999,16624,0.29822,6640,67802.940734,37606.246449,18604.178436,183967.0,102036.0,50478.0
2960,Columbia County,51111,40888.8,25555.5,15333.3,3,15000,19999,243,0.066673,115,734.20112,456.334467,250.667433,1671.0,1039.0,570.0
2961,Cowlitz County,51752,41401.6,25876.0,15525.6,3,15000,19999,4846,0.105141,2209,16726.013723,9946.09922,5078.256531,42469.0,25254.0,12894.0
2962,Douglas County,60452,48361.6,30226.0,18135.6,3,15000,19999,1057,0.627245,615,5865.683417,3288.481696,1442.755951,16109.0,9031.0,3962.0
2963,Ferry County,41924,33539.2,20962.0,12577.2,2,10000,14999,361,0.515543,171,1268.188798,793.113023,449.157872,3102.0,1940.0,1099.0


In [148]:
print(f"""Total Washingtonians:
low income (<80% of county median): {sum(est_hh['population_low'])}
very low income (<50% of county median): {sum(est_hh['population_v_low'])}
extra low income (<30% of county median): {sum(est_hh['population_x_low'])}""")

Total Washingtonians:
low income (<80% of county median): 2908835.0
very low income (<50% of county median): 1700235.0
extra low income (<30% of county median): 913052.0
