In [3]:
import numpy as np
import pandas as pd
import math
import zipfile         # a core library for working with zip files
import requests        # third-party library for making HTTP requests
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = '{:.2f}'.format

# Outline

###  Part A. Sorting Households by Income Category
 1. Bring in 2015 PUMS data
 2. Assign county_id to each puma
 3. Bring in and clean, reformat county AMIs data
 4. Create dummy variables for each income category and assign to households by number of people and hh income
 5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

### Part B. Counting People by Age Group and Income Category
1. Create count variables for each income category and age group (under 18, adult, senior?)
2. Calculate count variable by multiplying each income category dummy by the number of people within that age category
 
### Part C. Aggregating at PUMA Level
1. Aggregate at PUMA level using groupby

## Part A1. Bring in 2015 PUMS data

In [10]:
#load zipfile from PUMS website
url = "https://www2.census.gov/programs-surveys/acs/data/pums/2015/1-Year/csv_hca.zip"
with open('csv_hca.zip', 'wb') as f:
    r = requests.get(url)
    f.write(r.content)

In [11]:
#open zipfile
z = zipfile.ZipFile('csv_hca.zip')

In [12]:
#import table to dataframe
variable_types = {"NP":"int64","NOC":"float","BDSP":"float","RMSP":"str","MV":"float","WGTP":"float","TAXP":"float"}
columns = ["NP","R65","NRC","PUMA","RT","BDSP","BLD","RNTP","MRGP","SMP","CONP","TEN","VACS","VALP","TAXP","GRPIP","GRNTP","HINCP","MV","WGTP"]
pums_df = pd.read_csv(z.open('ss15hca.csv'), 
                     low_memory=False,
                     usecols=columns)
pums_df

Unnamed: 0,RT,PUMA,WGTP,NP,BDSP,BLD,CONP,MRGP,RNTP,SMP,TEN,VACS,VALP,GRNTP,GRPIP,HINCP,MV,NRC,R65,TAXP
0,H,5906,100,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,
1,H,8105,66,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00
2,H,3727,65,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00
3,H,6101,98,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00
4,H,3738,203,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00
5,H,6513,252,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00
6,H,3744,71,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,
7,H,7108,0,1,,,,,,,,,,,,,,,,
8,H,5908,119,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00
9,H,5905,122,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00


In [9]:
#rename variables
pums_df.rename(columns={"RT":"record_type","BDSP":"number_bedrooms",
    "BLD":"units_in_structure",
    "RNTP":"monthly_rent",
    "MRGP":"first_mortgage",
    "SMP":"second_mortgage",
    "CONP":"condo_fee",
    "TEN":"tenure",
    "VACS":"vacancy_status",
    "VALP":"property_value",
    "GRPIP":"gross_rent_pct_of_income",
    "GRNTP":"gross_rent",
    "HINCP":"hh_income",
    "N65":"ppl_over_65",
    "NP":"ppl_in_hh",
    "NRC":"number_related_children",
    "WGTP":"weight",
    "TAXP":"prop_tax",
    "MV":"moved_in"}, inplace=True)
pums_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax
0,H,8513,139,2,02,5.00,,,00700,,3.00,,,0700,056,000015000,1.00,00,0.00,
1,H,6708,177,0,03,2.00,,,,,,7.00,,,,,,,,
2,H,7301,259,4,04,2.00,,,02300,,3.00,,,2451,040,000073000,2.00,01,0.00,
3,H,1903,165,3,03,2.00,,01100,,,1.00,,0215000,,,000086000,3.00,01,0.00,52.00
4,H,6509,174,3,03,2.00,,,00650,,3.00,,,0990,018,000064500,3.00,01,0.00,
5,H,6506,194,7,03,1.00,,,00850,,3.00,,,0960,037,000031200,4.00,04,0.00,
6,H,6101,166,3,03,2.00,,01600,,,1.00,,0300000,,,000329500,3.00,00,0.00,8.00
7,H,3729,0,1,,,,,,,,,,,,,,,,
8,H,7501,109,3,03,4.00,,,02700,,3.00,,,2770,023,000143000,3.00,00,0.00,
9,H,3709,67,2,02,2.00,,01900,,,1.00,,0600000,,,000109000,5.00,00,0.00,60.00


## Part A2. Assign county id to each puma

In [4]:
# load in crosswalk file
crosswalk_df=pd.read_csv("PUMA_County_Crosswalk_v2.csv", delimiter=",")
crosswalk_df

Unnamed: 0,PUMA,county1,county2,county3,county4,county5,county6,county7
0,101,Alameda CA,,,,,,
1,102,Alameda CA,,,,,,
2,103,Alameda CA,,,,,,
3,104,Alameda CA,,,,,,
4,105,Alameda CA,,,,,,
5,106,Alameda CA,,,,,,
6,107,Alameda CA,,,,,,
7,108,Alameda CA,,,,,,
8,109,Alameda CA,,,,,,
9,110,Alameda CA,,,,,,


In [5]:
# add county name column to puma file
puma_county_df=pums_df.merge(crosswalk_df, how='left', left_on = "PUMA",right_on = "PUMA")
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7
0,H,5906,100.00,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,,Orange CA,,,,,,
1,H,8105,66.00,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00,San Mateo CA,,,,,,
2,H,3727,65.00,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00,Los Angeles CA,,,,,,
3,H,6101,98.00,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00,Placer CA,,,,,,
4,H,3738,203.00,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00,Los Angeles CA,,,,,,
5,H,6513,252.00,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00,Riverside CA,,,,,,
6,H,3744,71.00,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,,Los Angeles CA,,,,,,
7,H,7108,0.00,1,,,,,,,,,,,,,,,,,San Bernardino CA,,,,,,
8,H,5908,119.00,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00,Orange CA,,,,,,
9,H,5905,122.00,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00,Orange CA,,,,,,


In [6]:
# remove " CA" from end of each county name
puma_county_df['County'] = puma_county_df['county1'].str.replace(r' CA', '')
puma_county_df['county2'] = puma_county_df['county2'].str.replace(r' CA', '')
puma_county_df['county3'] = puma_county_df['county3'].str.replace(r' CA', '')
puma_county_df['county4'] = puma_county_df['county4'].str.replace(r' CA', '')
puma_county_df['county5'] = puma_county_df['county5'].str.replace(r' CA', '')
puma_county_df['county6'] = puma_county_df['county6'].str.replace(r' CA', '')
puma_county_df['county7'] = puma_county_df['county7'].str.replace(r' CA', '')
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County
0,H,5906,100.00,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,,Orange CA,,,,,,,Orange
1,H,8105,66.00,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00,San Mateo CA,,,,,,,San Mateo
2,H,3727,65.00,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00,Los Angeles CA,,,,,,,Los Angeles
3,H,6101,98.00,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00,Placer CA,,,,,,,Placer
4,H,3738,203.00,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00,Los Angeles CA,,,,,,,Los Angeles
5,H,6513,252.00,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00,Riverside CA,,,,,,,Riverside
6,H,3744,71.00,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles
7,H,7108,0.00,1,,,,,,,,,,,,,,,,,San Bernardino CA,,,,,,,San Bernardino
8,H,5908,119.00,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00,Orange CA,,,,,,,Orange
9,H,5905,122.00,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00,Orange CA,,,,,,,Orange


## Part A3. Bring in and clean, reformat county AMIs data

In [7]:
#bring in 2015 county AMIs file
df = pd.read_csv("2015_AMIs.csv", delimiter=",")
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda County\n4-Per,Extremely Low,19650,22450,25250,28050,30300,32570,36730,40890
1,Alameda County\n4-Per,Very Low Income,32750,37400,42100,46750,50500,54250,58000,61750
2,Alameda County\n4-Per,Low Income,50150,57300,64450,71600,77350,83100,88800,94550
3,Alameda County\n4-Per,Median Income,65450,74800,84150,93500,101000,108450,115950,123400
4,Alameda County\n4-Per,Moderate Income,78550,89750,101000,112200,121200,130150,139150,148100
5,Alpine County\n4-Pers,Extremely Low,18150,20750,23350,25900,28410,32570,36730,40890
6,Alpine County\n4-Pers,Very Low Income,30250,34600,38900,43200,46700,50150,53600,57050
7,Alpine County\n4-Pers,Low Income,46100,52650,59250,65800,71100,76350,81600,86900
8,Alpine County\n4-Pers,Median Income,66450,75900,85400,94900,102500,110100,117700,125250
9,Alpine County\n4-Pers,Moderate Income,79750,91100,102500,113900,123000,132100,141250,150350


In [8]:
#clean county names variable
df['County'] = df['County'].str.replace(r' County\n4-Person', '')
df['County'] = df['County'].str.replace(r' County\n4-Perso', '')
df['County'] = df['County'].str.replace(r' County\n4-Pers', '')
df['County'] = df['County'].str.replace(r' County\n4-Per', '')
df['County'] = df['County'].str.replace(r' County\n4-Pe', '')
df['County'] = df['County'].str.replace(r' County\n4-P', '')
df['County'] = df['County'].str.replace(r' County\n4-', '')
df['County'] = df['County'].str.replace(r' County\n4', '')
df['County'] = df['County'].str.replace(r' County\n', '')
df['County'] = df['County'].str.replace(r' County\ ' , '')
df['County'] = df['County'].str.replace(r' County', '')
df['County'] = df['County'].str.replace(r' Count', '')
df['County'] = df['County'].str.replace(r' Coun', '')
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda,Extremely Low,19650,22450,25250,28050,30300,32570,36730,40890
1,Alameda,Very Low Income,32750,37400,42100,46750,50500,54250,58000,61750
2,Alameda,Low Income,50150,57300,64450,71600,77350,83100,88800,94550
3,Alameda,Median Income,65450,74800,84150,93500,101000,108450,115950,123400
4,Alameda,Moderate Income,78550,89750,101000,112200,121200,130150,139150,148100
5,Alpine,Extremely Low,18150,20750,23350,25900,28410,32570,36730,40890
6,Alpine,Very Low Income,30250,34600,38900,43200,46700,50150,53600,57050
7,Alpine,Low Income,46100,52650,59250,65800,71100,76350,81600,86900
8,Alpine,Median Income,66450,75900,85400,94900,102500,110100,117700,125250
9,Alpine,Moderate Income,79750,91100,102500,113900,123000,132100,141250,150350


In [9]:
#create df for each income level and rename variables
eli_df = df[df.Income_Category =="Extremely Low"].copy()
eli_df.rename(columns={"1":"ELI_1",
                       "2":"ELI_2",
                       "3":"ELI_3",
                       "4":"ELI_4",
                       "5":"ELI_5",
                       "6":"ELI_6",
                       "7":"ELI_7",
                       "8":"ELI_8"}, inplace=True)
eli_df.drop('Income_Category', axis=1, inplace=True)
eli_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8
0,Alameda,19650,22450,25250,28050,30300,32570,36730,40890
5,Alpine,18150,20750,23350,25900,28410,32570,36730,40890
10,Amador,15200,17400,20090,24250,28410,32570,36730,40890
15,Butte,12350,15930,20090,24250,28410,32570,35300,37600
20,Calaveras,14750,16850,20090,24250,28410,32570,36730,40890
25,Colusa,12350,15930,20090,24250,28410,32570,36400,38750
30,Contra Costa,19650,22450,25250,28050,30300,32570,36730,40890
35,Del Norte,12150,15930,20090,24250,28410,32570,35300,37600
40,El Dorado,16000,18300,20600,24250,28410,32570,36730,40890
45,Fresno,12150,15930,20090,24250,28410,32570,35300,37600


In [10]:
vli_df = df[df.Income_Category =="Very Low Income"].copy()
vli_df.rename(columns={"1":"VLI_1",
                       "2":"VLI_2",
                       "3":"VLI_3",
                       "4":"VLI_4",
                       "5":"VLI_5",
                       "6":"VLI_6",
                       "7":"VLI_7",
                       "8":"VLI_8"}, inplace=True)
vli_df.drop('Income_Category', axis=1, inplace=True)
vli_df

Unnamed: 0,County,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8
1,Alameda,32750,37400,42100,46750,50500,54250,58000,61750
6,Alpine,30250,34600,38900,43200,46700,50150,53600,57050
11,Amador,25350,28950,32550,36150,39050,41950,44850,47750
16,Butte,20550,23500,26450,29350,31700,34050,36400,38750
21,Calaveras,24600,28100,31600,35100,37950,40750,43550,46350
26,Colusa,20550,23500,26450,29350,31700,34050,36400,38750
31,Contra Costa,32750,37400,42100,46750,50500,54250,58000,61750
36,Del Norte,20300,23200,26100,28950,31300,33600,35900,38250
41,El Dorado,26650,30450,34250,38050,41100,44150,47200,50250
46,Fresno,20300,23200,26100,28950,31300,33600,35900,38250


In [11]:
li_df = df[df.Income_Category =="Low Income"].copy()
li_df.rename(columns={"1":"LI_1",
                       "2":"LI_2",
                       "3":"LI_3",
                       "4":"LI_4",
                       "5":"LI_5",
                       "6":"LI_6",
                       "7":"LI_7",
                       "8":"LI_8"}, inplace=True)
li_df.drop('Income_Category', axis=1, inplace=True)
li_df

Unnamed: 0,County,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8
2,Alameda,50150,57300,64450,71600,77350,83100,88800,94550
7,Alpine,46100,52650,59250,65800,71100,76350,81600,86900
12,Amador,40500,46300,52100,57850,62500,67150,71750,76400
17,Butte,32900,37600,42300,46950,50750,54500,58250,62000
22,Calaveras,39350,44950,50550,56150,60650,65150,69650,74150
27,Colusa,32900,37600,42300,46950,50750,54500,58250,62000
32,Contra Costa,50150,57300,64450,71600,77350,83100,88800,94550
37,Del Norte,32450,37050,41700,46300,50050,53750,57450,61150
42,El Dorado,42650,48750,54850,60900,65800,70650,75550,80400
47,Fresno,32450,37050,41700,46300,50050,53750,57450,61150


In [12]:
mi_df = df[df.Income_Category =="Median Income"].copy()
mi_df.rename(columns={"1":"MI_1",
                       "2":"MI_2",
                       "3":"MI_3",
                       "4":"MI_4",
                       "5":"MI_5",
                       "6":"MI_6",
                       "7":"MI_7",
                       "8":"MI_8"}, inplace=True)
mi_df.drop('Income_Category', axis=1, inplace=True)
mi_df

Unnamed: 0,County,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8
3,Alameda,65450,74800,84150,93500,101000,108450,115950,123400
8,Alpine,66450,75900,85400,94900,102500,110100,117700,125250
13,Amador,50600,57850,65050,72300,78100,83850,89650,95450
18,Butte,41100,46950,52850,58700,63400,68100,72800,77500
23,Calaveras,49150,56150,63200,70200,75800,81450,87050,92650
28,Colusa,41100,46950,52850,58700,63400,68100,72800,77500
33,Contra Costa,65450,74800,84150,93500,101000,108450,115950,123400
38,Del Norte,40550,46300,52100,57900,62550,67150,71800,76450
43,El Dorado,53250,60900,68500,76100,82200,88300,94350,100450
48,Fresno,40550,46300,52100,57900,62550,67150,71800,76450


In [13]:
moi_df = df[df.Income_Category =="Moderate Income"].copy()
moi_df.rename(columns={"1":"MoI_1",
                       "2":"MoI_2",
                       "3":"MoI_3",
                       "4":"MoI_4",
                       "5":"MoI_5",
                       "6":"MoI_6",
                       "7":"MoI_7",
                       "8":"MoI_8"}, inplace=True)
moi_df.drop('Income_Category', axis=1, inplace=True)
moi_df

Unnamed: 0,County,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
4,Alameda,78550,89750,101000,112200,121200,130150,139150,148100
9,Alpine,79750,91100,102500,113900,123000,132100,141250,150350
14,Amador,60700,69400,78100,86750,93700,100650,107550,114500
19,Butte,49300,56350,63400,70450,76100,81700,87350,93000
24,Calaveras,58950,67400,75850,84250,91000,97750,104450,111200
29,Colusa,49300,56350,63400,70450,76100,81700,87350,93000
34,Contra Costa,78550,89750,101000,112200,121200,130150,139150,148100
39,Del Norte,48650,55600,62550,69500,75050,80600,86200,91750
44,El Dorado,63900,73050,82150,91300,98600,105900,113200,120500
49,Fresno,48650,55600,62550,69500,75050,80600,86200,91750


In [14]:
#combine dfs
all_amis_df=eli_df.merge(vli_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(li_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(mi_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(moi_df, how='left', left_on = "County",right_on = "County")
all_amis_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,Alameda,19650,22450,25250,28050,30300,32570,36730,40890,32750,37400,42100,46750,50500,54250,58000,61750,50150,57300,64450,71600,77350,83100,88800,94550,65450,74800,84150,93500,101000,108450,115950,123400,78550,89750,101000,112200,121200,130150,139150,148100
1,Alpine,18150,20750,23350,25900,28410,32570,36730,40890,30250,34600,38900,43200,46700,50150,53600,57050,46100,52650,59250,65800,71100,76350,81600,86900,66450,75900,85400,94900,102500,110100,117700,125250,79750,91100,102500,113900,123000,132100,141250,150350
2,Amador,15200,17400,20090,24250,28410,32570,36730,40890,25350,28950,32550,36150,39050,41950,44850,47750,40500,46300,52100,57850,62500,67150,71750,76400,50600,57850,65050,72300,78100,83850,89650,95450,60700,69400,78100,86750,93700,100650,107550,114500
3,Butte,12350,15930,20090,24250,28410,32570,35300,37600,20550,23500,26450,29350,31700,34050,36400,38750,32900,37600,42300,46950,50750,54500,58250,62000,41100,46950,52850,58700,63400,68100,72800,77500,49300,56350,63400,70450,76100,81700,87350,93000
4,Calaveras,14750,16850,20090,24250,28410,32570,36730,40890,24600,28100,31600,35100,37950,40750,43550,46350,39350,44950,50550,56150,60650,65150,69650,74150,49150,56150,63200,70200,75800,81450,87050,92650,58950,67400,75850,84250,91000,97750,104450,111200
5,Colusa,12350,15930,20090,24250,28410,32570,36400,38750,20550,23500,26450,29350,31700,34050,36400,38750,32900,37600,42300,46950,50750,54500,58250,62000,41100,46950,52850,58700,63400,68100,72800,77500,49300,56350,63400,70450,76100,81700,87350,93000
6,Contra Costa,19650,22450,25250,28050,30300,32570,36730,40890,32750,37400,42100,46750,50500,54250,58000,61750,50150,57300,64450,71600,77350,83100,88800,94550,65450,74800,84150,93500,101000,108450,115950,123400,78550,89750,101000,112200,121200,130150,139150,148100
7,Del Norte,12150,15930,20090,24250,28410,32570,35300,37600,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750
8,El Dorado,16000,18300,20600,24250,28410,32570,36730,40890,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
9,Fresno,12150,15930,20090,24250,28410,32570,35300,37600,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750


## Part A4. Create dummy variables for each income category and assign to households by number of people and hh income

In [15]:
# merge datasets
hh_df=puma_county_df.merge(all_amis_df, how='left', left_on = "County",right_on = "County")
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,H,5906,100.00,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150
1,H,8105,66.00,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00,San Mateo CA,,,,,,,San Mateo,24650,28150,31650,35150,38000,40800,43600,46400,41050,46900,52750,58600,63300,68000,72700,77400,65700,75100,84500,93850,101400,108900,116400,123900,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150
2,H,3727,65.00,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650
3,H,6101,98.00,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00,Placer CA,,,,,,,Placer,16000,18300,20600,24250,28410,32570,36730,40890,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
4,H,3738,203.00,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650
5,H,6513,252.00,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00,Riverside CA,,,,,,,Riverside,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950
6,H,3744,71.00,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650
7,H,7108,0.00,1,,,,,,,,,,,,,,,,,San Bernardino CA,,,,,,,San Bernardino,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950
8,H,5908,119.00,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150
9,H,5905,122.00,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150


In [16]:
#create count variables for households in each income group

hh_df["ELI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.ELI_8),1,0)
hh_df["VLI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.VLI_1)&(hh_df.hh_income>hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.VLI_2)&(hh_df.hh_income>hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.VLI_3)&(hh_df.hh_income>hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.VLI_4)&(hh_df.hh_income>hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.VLI_5)&(hh_df.hh_income>hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.VLI_6)&(hh_df.hh_income>hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.VLI_7)&(hh_df.hh_income>hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.VLI_8)&(hh_df.hh_income>hh_df.ELI_8),1,0)
hh_df["LI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.LI_1)&(hh_df.hh_income>hh_df.VLI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.LI_2)&(hh_df.hh_income>hh_df.VLI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.LI_3)&(hh_df.hh_income>hh_df.VLI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.LI_4)&(hh_df.hh_income>hh_df.VLI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.LI_5)&(hh_df.hh_income>hh_df.VLI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.LI_6)&(hh_df.hh_income>hh_df.VLI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.LI_7)&(hh_df.hh_income>hh_df.VLI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.LI_8)&(hh_df.hh_income>hh_df.VLI_8),1,0)
hh_df["MoI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.MoI_1)&(hh_df.hh_income>hh_df.LI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.MoI_2)&(hh_df.hh_income>hh_df.LI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.MoI_3)&(hh_df.hh_income>hh_df.LI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.MoI_4)&(hh_df.hh_income>hh_df.LI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.MoI_5)&(hh_df.hh_income>hh_df.LI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.MoI_6)&(hh_df.hh_income>hh_df.LI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.MoI_7)&(hh_df.hh_income>hh_df.LI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.MoI_8)&(hh_df.hh_income>hh_df.LI_8),1,0)
hh_df["HI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income>hh_df.MoI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income>hh_df.MoI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income>hh_df.MoI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income>hh_df.MoI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income>hh_df.MoI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income>hh_df.MoI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income>hh_df.MoI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income>hh_df.MoI_8),1,0)
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count
0,H,5906,100.00,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,1,0,0
1,H,8105,66.00,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00,San Mateo CA,,,,,,,San Mateo,24650,28150,31650,35150,38000,40800,43600,46400,41050,46900,52750,58600,63300,68000,72700,77400,65700,75100,84500,93850,101400,108900,116400,123900,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150,0,0,0,0,1
2,H,3727,65.00,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1
3,H,6101,98.00,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00,Placer CA,,,,,,,Placer,16000,18300,20600,24250,28410,32570,36730,40890,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1
4,H,3738,203.00,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,1,0,0,0
5,H,6513,252.00,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00,Riverside CA,,,,,,,Riverside,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,0,1
6,H,3744,71.00,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,1,0,0,0,0
7,H,7108,0.00,1,,,,,,,,,,,,,,,,,San Bernardino CA,,,,,,,San Bernardino,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,0,0
8,H,5908,119.00,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,0,0,1
9,H,5905,122.00,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,0,0,1


## Part. A5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

In [17]:
hh_df["15_ELI_hh_count"]=hh_df.ELI_count*hh_df.weight
hh_df["15_ELI_ppl_count"]=hh_df.ELI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["15_VLI_hh_count"]=hh_df.VLI_count*hh_df.weight
hh_df["15_VLI_ppl_count"]=hh_df.VLI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["15_LI_hh_count"]=hh_df.LI_count*hh_df.weight
hh_df["15_LI_ppl_count"]=hh_df.LI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["15_MoI_hh_count"]=hh_df.MoI_count*hh_df.weight
hh_df["15_MoI_ppl_count"]=hh_df.MoI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["15_HI_hh_count"]=hh_df.HI_count*hh_df.weight
hh_df["15_HI_ppl_count"]=hh_df.HI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count,15_ELI_hh_count,15_ELI_ppl_count,15_VLI_hh_count,15_VLI_ppl_count,15_LI_hh_count,15_LI_ppl_count,15_MoI_hh_count,15_MoI_ppl_count,15_HI_hh_count,15_HI_ppl_count
0,H,5906,100.00,2,1.00,8.00,,,1100.00,,3.00,,,1120.00,26.00,52000.00,2.00,0.00,0.00,,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,1,0,0,0.00,0.00,0.00,0.00,100.00,200.00,0.00,0.00,0.00,0.00
1,H,8105,66.00,4,3.00,2.00,0.00,6200.00,,,1.00,,1800000.00,,,232000.00,1.00,2.00,0.00,68.00,San Mateo CA,,,,,,,San Mateo,24650,28150,31650,35150,38000,40800,43600,46400,41050,46900,52750,58600,63300,68000,72700,77400,65700,75100,84500,93850,101400,108900,116400,123900,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,66.00,264.00
2,H,3727,65.00,3,3.00,3.00,0.00,2700.00,,,1.00,,1200000.00,,,108800.00,4.00,1.00,0.00,68.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,65.00,195.00
3,H,6101,98.00,1,3.00,2.00,0.00,700.00,,,1.00,,375000.00,,,65000.00,4.00,0.00,1.00,57.00,Placer CA,,,,,,,Placer,16000,18300,20600,24250,28410,32570,36730,40890,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,98.00,98.00
4,H,3738,203.00,2,2.00,1.00,0.00,,,,2.00,,17000.00,,,29000.00,6.00,0.00,0.00,2.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,1,0,0,0,0.00,0.00,203.00,406.00,0.00,0.00,0.00,0.00,0.00,0.00
5,H,6513,252.00,1,7.00,2.00,0.00,1600.00,,,1.00,,650000.00,,,100000.00,4.00,0.00,0.00,66.00,Riverside CA,,,,,,,Riverside,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,252.00,252.00
6,H,3744,71.00,3,1.00,2.00,,,800.00,,3.00,,,910.00,101.00,8000.00,3.00,0.00,0.00,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,28410,32570,36730,40890,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,1,0,0,0,0,71.00,213.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
7,H,7108,0.00,1,,,,,,,,,,,,,,,,,San Bernardino CA,,,,,,,San Bernardino,14100,16100,20090,24250,28410,32570,36730,40890,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
8,H,5908,119.00,3,2.00,2.00,350.00,500.00,,,1.00,,500000.00,,,108000.00,5.00,0.00,2.00,51.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,119.00,357.00
9,H,5905,122.00,4,3.00,2.00,0.00,2100.00,,300.00,1.00,,525000.00,,,137000.00,5.00,2.00,0.00,36.00,Orange CA,,,,,,,Orange,20250,23150,26050,28900,31250,33550,36730,40890,33750,38550,43350,48150,52050,55900,59750,63600,53950,61650,69350,77050,83250,89400,95550,101750,61050,69750,78500,87200,94200,101150,108150,115100,73250,83700,94200,104650,113000,121400,129750,138150,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,122.00,488.00


# Part C1. Aggregating at the PUMA level

In [18]:
puma_sums_df = hh_df.groupby("PUMA").sum()
puma_counts_df = puma_sums_df[["15_ELI_hh_count","15_ELI_ppl_count",
                              "15_VLI_hh_count",
                              "15_VLI_ppl_count",
                              "15_LI_hh_count",
                              "15_LI_ppl_count",
                              "15_MoI_hh_count",
                              "15_MoI_ppl_count",
                              "15_HI_hh_count",
                              "15_HI_ppl_count"]].copy()
puma_counts_df

Unnamed: 0_level_0,15_ELI_hh_count,15_ELI_ppl_count,15_VLI_hh_count,15_VLI_ppl_count,15_LI_hh_count,15_LI_ppl_count,15_MoI_hh_count,15_MoI_ppl_count,15_HI_hh_count,15_HI_ppl_count
PUMA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
101,8876.00,14394.00,5749.00,11643.00,6721.00,12889.00,9958.00,20338.00,22151.00,58766.00
102,19572.00,41514.00,10145.00,21846.00,10900.00,24921.00,11414.00,25389.00,22475.00,47032.00
103,6150.00,8451.00,5097.00,9579.00,5741.00,10741.00,7984.00,15782.00,29394.00,72586.00
104,12986.00,35620.00,8278.00,24347.00,7865.00,28729.00,6731.00,18399.00,5966.00,16291.00
105,7500.00,13561.00,8953.00,23036.00,11384.00,27481.00,12761.00,31935.00,23758.00,65637.00
106,6917.00,16297.00,4050.00,9765.00,8530.00,23961.00,9752.00,28423.00,18902.00,55348.00
107,6198.00,13220.00,5639.00,15868.00,8703.00,29833.00,10796.00,41147.00,15034.00,45365.00
108,2691.00,6327.00,3373.00,11184.00,6197.00,17988.00,9372.00,33557.00,20136.00,67926.00
109,4269.00,8833.00,4097.00,8718.00,7999.00,23275.00,11772.00,34308.00,37605.00,118317.00
110,6106.00,12968.00,6572.00,18841.00,5967.00,12200.00,13669.00,41272.00,46398.00,138023.00


In [19]:
#export it
puma_counts_df.to_csv("2015_ppl_hh_puma_counts.csv")