In [1]:
import numpy as np
import pandas as pd
import math
import zipfile         # a core library for working with zip files
import requests        # third-party library for making HTTP requests
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = '{:.2f}'.format

# Outline

###  Part A. Sorting Households by Income Category
 1. Bring in 2017 PUMS data
 2. Assign county_id to each puma
 3. Bring in and clean, reformat county AMIs data
 4. Create dummy variables for each income category and assign to households by number of people and hh income
 5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

### Part B. Counting People by Age Group and Income Category
1. Create count variables for each income category and age group (under 18, adult, senior?)
2. Calculate count variable by multiplying each income category dummy by the number of people within that age category
 
### Part C. Aggregating at PUMA Level
1. Aggregate at PUMA level using groupby

## Part A1. Bring in 2017 PUMS data

In [2]:
#load zipfile from PUMS website
url = "https://www2.census.gov/programs-surveys/acs/data/pums/2017/1-Year/csv_hca.zip"
with open('csv_hca.zip', 'wb') as f:
    r = requests.get(url)
    f.write(r.content)

In [3]:
#open zipfile
z = zipfile.ZipFile('csv_hca.zip')

In [5]:
#import table to dataframe
variable_types = {"NP":"int64","NOC":"float","BDSP":"float","RMSP":"str","MV":"float","WGTP":"float","TAXP":"float"}
columns = ["NP","R65","NRC","PUMA","RT","BDSP","BLD","RNTP","MRGP","SMP","CONP","TEN","VACS","VALP","TAXP","GRPIP","GRNTP","HINCP","MV","WGTP"]
pums_df = pd.read_csv(z.open('psam_h06.csv'), 
                     low_memory=False,
                     usecols=columns)
pums_df

Unnamed: 0,RT,PUMA,WGTP,NP,BDSP,BLD,CONP,MRGP,RNTP,SMP,TEN,VACS,VALP,GRNTP,GRPIP,HINCP,MV,NRC,R65,TAXP
0,H,6703,59,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,
1,H,110,74,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00
2,H,7901,111,0,2.00,6.00,,,,,,5.00,,,,,,,,
3,H,1906,63,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00
4,H,6710,75,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00
5,H,10703,25,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00
6,H,3727,94,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,
7,H,2901,31,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00
8,H,5500,103,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00
9,H,4702,107,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,


In [3]:
#rename variables
pums_df.rename(columns={"RT":"record_type","BDSP":"number_bedrooms",
    "BLD":"units_in_structure",
    "RNTP":"monthly_rent",
    "MRGP":"first_mortgage",
    "SMP":"second_mortgage",
    "CONP":"condo_fee",
    "TEN":"tenure",
    "VACS":"vacancy_status",
    "VALP":"property_value",
    "GRPIP":"gross_rent_pct_of_income",
    "GRNTP":"gross_rent",
    "HINCP":"hh_income",
    "N65":"ppl_over_65",
    "NP":"ppl_in_hh",
    "NRC":"number_related_children",
    "WGTP":"weight",
    "TAXP":"prop_tax",
    "MV":"moved_in"}, inplace=True)
pums_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,


## Part A2. Assign county id to each puma

In [4]:
# load in crosswalk file
crosswalk_df=pd.read_csv("C:/Users/jonst/Box/Cost of Doing Nothing/Climate Impacts/Data/PUMS/PUMA_County_Crosswalk_v2.csv", delimiter=",")
crosswalk_df

Unnamed: 0,PUMA,county1,county2,county3,county4,county5,county6,county7
0,101,Alameda CA,,,,,,
1,102,Alameda CA,,,,,,
2,103,Alameda CA,,,,,,
3,104,Alameda CA,,,,,,
4,105,Alameda CA,,,,,,
5,106,Alameda CA,,,,,,
6,107,Alameda CA,,,,,,
7,108,Alameda CA,,,,,,
8,109,Alameda CA,,,,,,
9,110,Alameda CA,,,,,,


In [5]:
# add county name column to puma file
puma_county_df=pums_df.merge(crosswalk_df, how='left', left_on = "PUMA",right_on = "PUMA")
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,,Sacramento CA,,,,,,
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00,Alameda CA,,,,,,
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,,San Luis Obispo CA,,,,,,
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00,Fresno CA,,,,,,
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00,Sacramento CA,,,,,,
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00,Tulare CA,,,,,,
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,,Los Angeles CA,,,,,,
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00,Kern CA,,,,,,
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00,Napa CA,,,,,,
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,,Merced CA,,,,,,


In [6]:
# remove " CA" from end of each county name
puma_county_df['County'] = puma_county_df['county1'].str.replace(r' CA', '')
puma_county_df['county2'] = puma_county_df['county2'].str.replace(r' CA', '')
puma_county_df['county3'] = puma_county_df['county3'].str.replace(r' CA', '')
puma_county_df['county4'] = puma_county_df['county4'].str.replace(r' CA', '')
puma_county_df['county5'] = puma_county_df['county5'].str.replace(r' CA', '')
puma_county_df['county6'] = puma_county_df['county6'].str.replace(r' CA', '')
puma_county_df['county7'] = puma_county_df['county7'].str.replace(r' CA', '')
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,,Sacramento CA,,,,,,,Sacramento
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00,Alameda CA,,,,,,,Alameda
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,,San Luis Obispo CA,,,,,,,San Luis Obispo
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00,Fresno CA,,,,,,,Fresno
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00,Sacramento CA,,,,,,,Sacramento
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00,Tulare CA,,,,,,,Tulare
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00,Kern CA,,,,,,,Kern
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00,Napa CA,,,,,,,Napa
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,,Merced CA,,,,,,,Merced


## Part A3. Bring in and clean, reformat county AMIs data

In [7]:
#bring in 2017 county AMIs file
df = pd.read_csv("2017_AMIs.csv", delimiter=",")
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda County\n4-Per,Extremely Low,21950,25050,28200,31300,33850,36350,38850,41350
1,Alameda County\n4-Per,Very Low Income,36550,41750,46950,52150,56350,60500,64700,68850
2,Alameda County\n4-Per,Low Income,56300,64350,72400,80400,86850,93300,99700,106150
3,Alameda County\n4-Per,Median Income,68200,77900,87650,97400,105200,113000,120800,128550
4,Alameda County\n4-Per,Moderate Income,81850,93500,105200,116900,126250,135600,144950,154300
5,Alpine County\n4-Pers,Extremely Low,18150,20750,23350,25900,28780,32960,37140,41320
6,Alpine County\n4-Pers,Very Low Income,30250,34600,38900,43200,46700,50150,53600,57050
7,Alpine County\n4-Pers,Low Income,46100,52650,59250,65800,71100,76350,81600,86900
8,Alpine County\n4-Pers,Median Income,66450,75900,85400,94900,102500,110100,117700,125250
9,Alpine County\n4-Pers,Moderate Income,79750,91100,102500,113900,123000,132100,141250,150350


In [8]:
#clean county names variable
df['County'] = df['County'].str.replace(r' County\n4-Person', '')
df['County'] = df['County'].str.replace(r' County\n4-Perso', '')
df['County'] = df['County'].str.replace(r' County\n4-Pers', '')
df['County'] = df['County'].str.replace(r' County\n4-Per', '')
df['County'] = df['County'].str.replace(r' County\n4-Pe', '')
df['County'] = df['County'].str.replace(r' County\n4-P', '')
df['County'] = df['County'].str.replace(r' County\n4-', '')
df['County'] = df['County'].str.replace(r' County\n4', '')
df['County'] = df['County'].str.replace(r' County\n', '')
df['County'] = df['County'].str.replace(r' County\ ' , '')
df['County'] = df['County'].str.replace(r' County', '')
df['County'] = df['County'].str.replace(r' Count', '')
df['County'] = df['County'].str.replace(r' Coun', '')
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda,Extremely Low,21950,25050,28200,31300,33850,36350,38850,41350
1,Alameda,Very Low Income,36550,41750,46950,52150,56350,60500,64700,68850
2,Alameda,Low Income,56300,64350,72400,80400,86850,93300,99700,106150
3,Alameda,Median Income,68200,77900,87650,97400,105200,113000,120800,128550
4,Alameda,Moderate Income,81850,93500,105200,116900,126250,135600,144950,154300
5,Alpine,Extremely Low,18150,20750,23350,25900,28780,32960,37140,41320
6,Alpine,Very Low Income,30250,34600,38900,43200,46700,50150,53600,57050
7,Alpine,Low Income,46100,52650,59250,65800,71100,76350,81600,86900
8,Alpine,Median Income,66450,75900,85400,94900,102500,110100,117700,125250
9,Alpine,Moderate Income,79750,91100,102500,113900,123000,132100,141250,150350


In [9]:
#create df for each income level and rename variables
eli_df = df[df.Income_Category =="Extremely Low"].copy()
eli_df.rename(columns={"1":"ELI_1",
                       "2":"ELI_2",
                       "3":"ELI_3",
                       "4":"ELI_4",
                       "5":"ELI_5",
                       "6":"ELI_6",
                       "7":"ELI_7",
                       "8":"ELI_8"}, inplace=True)
eli_df.drop('Income_Category', axis=1, inplace=True)
eli_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8
0,Alameda,21950,25050,28200,31300,33850,36350,38850,41350
5,Alpine,18150,20750,23350,25900,28780,32960,37140,41320
10,Amador,15200,17400,20420,24600,28780,32960,37140,41320
15,Butte,13200,16240,20420,24600,28780,32960,37140,41320
20,Calaveras,14750,16850,20420,24600,28780,32960,37140,41320
25,Colusa,12600,16240,20420,24600,28780,32960,37140,39550
30,Contra Costa,21950,25050,28200,31300,33850,36350,38850,41350
35,Del Norte,12600,16240,20420,24600,28780,32960,37140,39550
40,El Dorado,16000,18300,20600,24600,28780,32960,37140,41320
45,Fresno,12600,16240,20420,24600,28780,32960,37140,39550


In [10]:
vli_df = df[df.Income_Category =="Very Low Income"].copy()
vli_df.rename(columns={"1":"VLI_1",
                       "2":"VLI_2",
                       "3":"VLI_3",
                       "4":"VLI_4",
                       "5":"VLI_5",
                       "6":"VLI_6",
                       "7":"VLI_7",
                       "8":"VLI_8"}, inplace=True)
vli_df.drop('Income_Category', axis=1, inplace=True)
vli_df

Unnamed: 0,County,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8
1,Alameda,36550,41750,46950,52150,56350,60500,64700,68850
6,Alpine,30250,34600,38900,43200,46700,50150,53600,57050
11,Amador,25350,28950,32550,36150,39050,41950,44850,47750
16,Butte,21950,25050,28200,31300,33850,36350,38850,41350
21,Calaveras,24600,28100,31600,35100,37950,40750,43550,46350
26,Colusa,21000,24000,27000,29950,32350,34750,37150,39550
31,Contra Costa,36550,41750,46950,52150,56350,60500,64700,68850
36,Del Norte,21000,24000,27000,29950,32350,34750,37150,39550
41,El Dorado,26650,30450,34250,38050,41100,44150,47200,50250
46,Fresno,21000,24000,27000,29950,32350,34750,37150,39550


In [11]:
li_df = df[df.Income_Category =="Low Income"].copy()
li_df.rename(columns={"1":"LI_1",
                       "2":"LI_2",
                       "3":"LI_3",
                       "4":"LI_4",
                       "5":"LI_5",
                       "6":"LI_6",
                       "7":"LI_7",
                       "8":"LI_8"}, inplace=True)
li_df.drop('Income_Category', axis=1, inplace=True)
li_df

Unnamed: 0,County,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8
2,Alameda,56300,64350,72400,80400,86850,93300,99700,106150
7,Alpine,46100,52650,59250,65800,71100,76350,81600,86900
12,Amador,40500,46300,52100,57850,62500,67150,71750,76400
17,Butte,35100,40100,45100,50100,54150,58150,62150,66150
22,Calaveras,39350,44950,50550,56150,60650,65150,69650,74150
27,Colusa,33550,38350,43150,47900,51750,55600,59400,63250
32,Contra Costa,56300,64350,72400,80400,86850,93300,99700,106150
37,Del Norte,33550,38350,43150,47900,51750,55600,59400,63250
42,El Dorado,42650,48750,54850,60900,65800,70650,75550,80400
47,Fresno,33550,38350,43150,47900,51750,55600,59400,63250


In [13]:
moi_df = df[df.Income_Category =="Moderate Income"].copy()
moi_df.rename(columns={"1":"MoI_1",
                       "2":"MoI_2",
                       "3":"MoI_3",
                       "4":"MoI_4",
                       "5":"MoI_5",
                       "6":"MoI_6",
                       "7":"MoI_7",
                       "8":"MoI_8"}, inplace=True)
moi_df.drop('Income_Category', axis=1, inplace=True)
moi_df

Unnamed: 0,County,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
4,Alameda,81850,93500,105200,116900,126250,135600,144950,154300
9,Alpine,79750,91100,102500,113900,123000,132100,141250,150350
14,Amador,60700,69400,78100,86750,93700,100650,107550,114500
19,Butte,52550,60100,67600,75100,81100,87100,93100,99150
24,Calaveras,58950,67400,75850,84250,91000,97750,104450,111200
29,Colusa,50350,57500,64700,71900,77650,83400,89150,94900
34,Contra Costa,81850,93500,105200,116900,126250,135600,144950,154300
39,Del Norte,50350,57500,64700,71900,77650,83400,89150,94900
44,El Dorado,63900,73050,82150,91300,98600,105900,113200,120500
49,Fresno,50350,57500,64700,71900,77650,83400,89150,94900


In [14]:
#combine dfs
all_amis_df=eli_df.merge(vli_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(li_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(moi_df, how='left', left_on = "County",right_on = "County")
all_amis_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,Alameda,21950,25050,28200,31300,33850,36350,38850,41350,36550,41750,46950,52150,56350,60500,64700,68850,56300,64350,72400,80400,86850,93300,99700,106150,68200,77900,87650,97400,105200,113000,120800,128550,81850,93500,105200,116900,126250,135600,144950,154300
1,Alpine,18150,20750,23350,25900,28780,32960,37140,41320,30250,34600,38900,43200,46700,50150,53600,57050,46100,52650,59250,65800,71100,76350,81600,86900,66450,75900,85400,94900,102500,110100,117700,125250,79750,91100,102500,113900,123000,132100,141250,150350
2,Amador,15200,17400,20420,24600,28780,32960,37140,41320,25350,28950,32550,36150,39050,41950,44850,47750,40500,46300,52100,57850,62500,67150,71750,76400,50600,57850,65050,72300,78100,83850,89650,95450,60700,69400,78100,86750,93700,100650,107550,114500
3,Butte,13200,16240,20420,24600,28780,32960,37140,41320,21950,25050,28200,31300,33850,36350,38850,41350,35100,40100,45100,50100,54150,58150,62150,66150,43800,50100,56350,62600,67600,72600,77600,82650,52550,60100,67600,75100,81100,87100,93100,99150
4,Calaveras,14750,16850,20420,24600,28780,32960,37140,41320,24600,28100,31600,35100,37950,40750,43550,46350,39350,44950,50550,56150,60650,65150,69650,74150,49150,56150,63200,70200,75800,81450,87050,92650,58950,67400,75850,84250,91000,97750,104450,111200
5,Colusa,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900
6,Contra Costa,21950,25050,28200,31300,33850,36350,38850,41350,36550,41750,46950,52150,56350,60500,64700,68850,56300,64350,72400,80400,86850,93300,99700,106150,68200,77900,87650,97400,105200,113000,120800,128550,81850,93500,105200,116900,126250,135600,144950,154300
7,Del Norte,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900
8,El Dorado,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
9,Fresno,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900


## Part A4. Create dummy variables for each income category and assign to households by number of people and hh income

In [15]:
# merge datasets
hh_df=puma_county_df.merge(all_amis_df, how='left', left_on = "County",right_on = "County")
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00,Alameda CA,,,,,,,Alameda,21950,25050,28200,31300,33850,36350,38850,41350,36550,41750,46950,52150,56350,60500,64700,68850,56300,64350,72400,80400,86850,93300,99700,106150,68200,77900,87650,97400,105200,113000,120800,128550,81850,93500,105200,116900,126250,135600,144950,154300
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,,San Luis Obispo CA,,,,,,,San Luis Obispo,17150,19600,22050,24600,28780,32960,37140,41320,28600,32700,36800,40850,44150,47400,50700,53950,45750,52300,58850,65350,70600,75850,81050,86300,58250,66550,74900,83200,89850,96500,103150,109800,69900,79900,89850,99850,107850,115850,123800,131800
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00,Fresno CA,,,,,,,Fresno,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00,Tulare CA,,,,,,,Tulare,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,18950,21650,24350,27050,29250,32960,37140,41320,31550,36050,40550,45050,48700,52300,55900,59500,50500,57700,64900,72100,77900,83650,89450,95200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00,Kern CA,,,,,,,Kern,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00,Napa CA,,,,,,,Napa,19600,22400,25200,27950,30200,32960,37140,41320,32600,37250,41900,46550,50300,54000,57750,61450,52150,59600,67050,74500,80500,86450,92400,98350,63700,72800,81900,91000,98300,105550,112850,120100,76450,87350,98300,109200,117950,126650,135400,144150
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,,Merced CA,,,,,,,Merced,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900


In [16]:
#create count variables for households in each income group

hh_df["ELI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.ELI_8),1,0)
hh_df["VLI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.VLI_1)&(hh_df.hh_income>hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.VLI_2)&(hh_df.hh_income>hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.VLI_3)&(hh_df.hh_income>hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.VLI_4)&(hh_df.hh_income>hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.VLI_5)&(hh_df.hh_income>hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.VLI_6)&(hh_df.hh_income>hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.VLI_7)&(hh_df.hh_income>hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.VLI_8)&(hh_df.hh_income>hh_df.ELI_8),1,0)
hh_df["LI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.LI_1)&(hh_df.hh_income>hh_df.VLI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.LI_2)&(hh_df.hh_income>hh_df.VLI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.LI_3)&(hh_df.hh_income>hh_df.VLI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.LI_4)&(hh_df.hh_income>hh_df.VLI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.LI_5)&(hh_df.hh_income>hh_df.VLI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.LI_6)&(hh_df.hh_income>hh_df.VLI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.LI_7)&(hh_df.hh_income>hh_df.VLI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.LI_8)&(hh_df.hh_income>hh_df.VLI_8),1,0)
hh_df["MoI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.MoI_1)&(hh_df.hh_income>hh_df.LI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.MoI_2)&(hh_df.hh_income>hh_df.LI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.MoI_3)&(hh_df.hh_income>hh_df.LI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.MoI_4)&(hh_df.hh_income>hh_df.LI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.MoI_5)&(hh_df.hh_income>hh_df.LI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.MoI_6)&(hh_df.hh_income>hh_df.LI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.MoI_7)&(hh_df.hh_income>hh_df.LI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.MoI_8)&(hh_df.hh_income>hh_df.LI_8),1,0)
hh_df["HI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income>hh_df.MoI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income>hh_df.MoI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income>hh_df.MoI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income>hh_df.MoI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income>hh_df.MoI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income>hh_df.MoI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income>hh_df.MoI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income>hh_df.MoI_8),1,0)
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,1,0,0
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00,Alameda CA,,,,,,,Alameda,21950,25050,28200,31300,33850,36350,38850,41350,36550,41750,46950,52150,56350,60500,64700,68850,56300,64350,72400,80400,86850,93300,99700,106150,68200,77900,87650,97400,105200,113000,120800,128550,81850,93500,105200,116900,126250,135600,144950,154300,0,0,0,0,1
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,,San Luis Obispo CA,,,,,,,San Luis Obispo,17150,19600,22050,24600,28780,32960,37140,41320,28600,32700,36800,40850,44150,47400,50700,53950,45750,52300,58850,65350,70600,75850,81050,86300,58250,66550,74900,83200,89850,96500,103150,109800,69900,79900,89850,99850,107850,115850,123800,131800,0,0,0,0,0
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00,Fresno CA,,,,,,,Fresno,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,1,0,0,0
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00,Tulare CA,,,,,,,Tulare,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,0,0,0,1
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,18950,21650,24350,27050,29250,32960,37140,41320,31550,36050,40550,45050,48700,52300,55900,59500,50500,57700,64900,72100,77900,83650,89450,95200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00,Kern CA,,,,,,,Kern,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,0,0,0,1
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00,Napa CA,,,,,,,Napa,19600,22400,25200,27950,30200,32960,37140,41320,32600,37250,41900,46550,50300,54000,57750,61450,52150,59600,67050,74500,80500,86450,92400,98350,63700,72800,81900,91000,98300,105550,112850,120100,76450,87350,98300,109200,117950,126650,135400,144150,0,0,0,0,1
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,,Merced CA,,,,,,,Merced,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,1,0,0,0


## Part. A5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

In [17]:
hh_df["17_ELI_hh_count"]=hh_df.ELI_count*hh_df.weight
hh_df["17_ELI_ppl_count"]=hh_df.ELI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["17_VLI_hh_count"]=hh_df.VLI_count*hh_df.weight
hh_df["17_VLI_ppl_count"]=hh_df.VLI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["17_LI_hh_count"]=hh_df.LI_count*hh_df.weight
hh_df["17_LI_ppl_count"]=hh_df.LI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["17_MoI_hh_count"]=hh_df.MoI_count*hh_df.weight
hh_df["17_MoI_ppl_count"]=hh_df.MoI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["17_HI_hh_count"]=hh_df.HI_count*hh_df.weight
hh_df["17_HI_ppl_count"]=hh_df.HI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count,17_ELI_hh_count,17_ELI_ppl_count,17_VLI_hh_count,17_VLI_ppl_count,17_LI_hh_count,17_LI_ppl_count,17_MoI_hh_count,17_MoI_ppl_count,17_HI_hh_count,17_HI_ppl_count
0,H,6703,59.00,3,2.00,9.00,,,940.00,,3.00,,,990.00,25.00,47000.00,3.00,1.00,0.00,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,1,0,0,0.00,0.00,0.00,0.00,59.00,177.00,0.00,0.00,0.00,0.00
1,H,110,74.00,4,4.00,2.00,0.00,2600.00,,,1.00,,780000.00,,,130500.00,1.00,2.00,0.00,67.00,Alameda CA,,,,,,,Alameda,21950,25050,28200,31300,33850,36350,38850,41350,36550,41750,46950,52150,56350,60500,64700,68850,56300,64350,72400,80400,86850,93300,99700,106150,68200,77900,87650,97400,105200,113000,120800,128550,81850,93500,105200,116900,126250,135600,144950,154300,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,74.00,296.00
2,H,7901,111.00,0,2.00,6.00,,,,,,5.00,,,,,,,,,San Luis Obispo CA,,,,,,,San Luis Obispo,17150,19600,22050,24600,28780,32960,37140,41320,28600,32700,36800,40850,44150,47400,50700,53950,45750,52300,58850,65350,70600,75850,81050,86300,58250,66550,74900,83200,89850,96500,103150,109800,69900,79900,89850,99850,107850,115850,123800,131800,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
3,H,1906,63.00,1,3.00,2.00,0.00,,,,2.00,,175000.00,,,13530.00,7.00,0.00,1.00,14.00,Fresno CA,,,,,,,Fresno,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,1,0,0,0,0.00,0.00,63.00,63.00,0.00,0.00,0.00,0.00,0.00,0.00
4,H,6710,75.00,3,5.00,2.00,0.00,,,,2.00,,500000.00,,,104000.00,4.00,0.00,0.00,48.00,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,24600,28780,32960,37140,41320,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,75.00,225.00
5,H,10703,25.00,2,2.00,2.00,0.00,570.00,,,1.00,,250000.00,,,58610.00,6.00,0.00,2.00,26.00,Tulare CA,,,,,,,Tulare,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,25.00,50.00
6,H,3727,94.00,2,2.00,7.00,,,1600.00,,3.00,,,1764.00,13.00,157400.00,5.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,18950,21650,24350,27050,29250,32960,37140,41320,31550,36050,40550,45050,48700,52300,55900,59500,50500,57700,64900,72100,77900,83650,89450,95200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,94.00,188.00
7,H,2901,31.00,4,4.00,2.00,0.00,1400.00,,,1.00,,330000.00,,,157000.00,4.00,2.00,0.00,62.00,Kern CA,,,,,,,Kern,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,31.00,124.00
8,H,5500,103.00,3,5.00,2.00,0.00,2300.00,,4000.00,1.00,,950000.00,,,432730.00,5.00,1.00,0.00,68.00,Napa CA,,,,,,,Napa,19600,22400,25200,27950,30200,32960,37140,41320,32600,37250,41900,46550,50300,54000,57750,61450,52150,59600,67050,74500,80500,86450,92400,98350,63700,72800,81900,91000,98300,105550,112850,120100,76450,87350,98300,109200,117950,126650,135400,144150,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,103.00,309.00
9,H,4702,107.00,1,1.00,9.00,,,610.00,,3.00,,,650.00,53.00,14600.00,2.00,0.00,1.00,,Merced CA,,,,,,,Merced,12600,16240,20420,24600,28780,32960,37140,39550,21000,24000,27000,29950,32350,34750,37150,39550,33550,38350,43150,47900,51750,55600,59400,63250,41950,47900,53900,59900,64700,69500,74300,79050,50350,57500,64700,71900,77650,83400,89150,94900,0,1,0,0,0,0.00,0.00,107.00,107.00,0.00,0.00,0.00,0.00,0.00,0.00


# Part C1. Aggregating at the PUMA level

In [18]:
puma_sums_df = hh_df.groupby("PUMA").sum()
puma_counts_df = puma_sums_df[["17_ELI_hh_count","17_ELI_ppl_count",
                              "17_VLI_hh_count",
                              "17_VLI_ppl_count",
                              "17_LI_hh_count",
                              "17_LI_ppl_count",
                              "17_MoI_hh_count",
                              "17_MoI_ppl_count",
                              "17_HI_hh_count",
                              "17_HI_ppl_count"]].copy()
puma_counts_df

Unnamed: 0_level_0,17_ELI_hh_count,17_ELI_ppl_count,17_VLI_hh_count,17_VLI_ppl_count,17_LI_hh_count,17_LI_ppl_count,17_MoI_hh_count,17_MoI_ppl_count,17_HI_hh_count,17_HI_ppl_count
PUMA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
101,9049.00,20684.00,5628.00,9891.00,5721.00,13576.00,7016.00,15822.00,24743.00,62004.00
102,18082.00,34330.00,8088.00,21164.00,9464.00,21306.00,10656.00,24924.00,27520.00,61088.00
103,4168.00,6683.00,4815.00,11332.00,5049.00,11536.00,8595.00,18062.00,31102.00,77826.00
104,10953.00,26342.00,9080.00,26951.00,6713.00,19340.00,7210.00,23307.00,6321.00,18510.00
105,11624.00,24427.00,8547.00,19013.00,9796.00,23031.00,10592.00,27541.00,24919.00,68060.00
106,4594.00,10390.00,5333.00,10827.00,8330.00,20843.00,8922.00,25266.00,19034.00,54900.00
107,4394.00,9044.00,5677.00,16193.00,8884.00,29286.00,9696.00,30078.00,18776.00,64074.00
108,4324.00,10192.00,2839.00,6786.00,5480.00,17188.00,7409.00,26173.00,24067.00,80419.00
109,4736.00,8156.00,4245.00,9599.00,6383.00,20494.00,11001.00,36831.00,37288.00,114752.00
110,4350.00,7991.00,6175.00,12750.00,8742.00,19589.00,9514.00,21487.00,56317.00,165137.00


In [19]:
#export it
puma_counts_df.to_csv("2017_ppl_hh_puma_counts.csv")