In [1]:
import numpy as np
import pandas as pd
import math
import zipfile         # a core library for working with zip files
import requests        # third-party library for making HTTP requests
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = '{:.2f}'.format

# Outline

###  Part A. Sorting Households by Income Category
 1. Bring in 2014 PUMS data
 2. Assign county_id to each puma
 3. Bring in and clean, reformat county AMIs data
 4. Create dummy variables for each income category and assign to households by number of people and hh income
 5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

### Part B. Counting People by Age Group and Income Category
1. Create count variables for each income category and age group (under 18, adult, senior?)
2. Calculate count variable by multiplying each income category dummy by the number of people within that age category
 
### Part C. Aggregating at PUMA Level
1. Aggregate at PUMA level using groupby

## Part A1. Bring in 2014 PUMS data

In [2]:
#load zipfile from PUMS website
url = "https://www2.census.gov/programs-surveys/acs/data/pums/2014/1-Year/csv_hca.zip"
with open('csv_hca.zip', 'wb') as f:
    r = requests.get(url)
    f.write(r.content)

In [3]:
#open zipfile
z = zipfile.ZipFile('csv_hca.zip')

In [4]:
#import table to dataframe
variable_types = {"NP":"int64","NOC":"float","RMSP":"str","MV":"float","WGTP":"float","TAXP":"float"}
columns = ["NP","R65","NRC","PUMA","RT","BDSP","BLD","RNTP","MRGP","SMP","CONP","TEN","VACS","VALP","TAXP","GRPIP","GRNTP","HINCP","MV","WGTP"]
pums_df = pd.read_csv(z.open('ss14hca.csv'), 
                     low_memory=False,
                     usecols=columns)
pums_df

Unnamed: 0,RT,PUMA,WGTP,NP,BDSP,BLD,CONP,MRGP,RNTP,SMP,TEN,VACS,VALP,GRNTP,GRPIP,HINCP,MV,NRC,R65,TAXP
0,H,8513,139,2,02,5.00,,,00700,,3.00,,,0700,056,000015000,1.00,00,0.00,
1,H,6708,177,0,03,2.00,,,,,,7.00,,,,,,,,
2,H,7301,259,4,04,2.00,,,02300,,3.00,,,2451,040,000073000,2.00,01,0.00,
3,H,1903,165,3,03,2.00,,01100,,,1.00,,0215000,,,000086000,3.00,01,0.00,52.00
4,H,6509,174,3,03,2.00,,,00650,,3.00,,,0990,018,000064500,3.00,01,0.00,
5,H,6506,194,7,03,1.00,,,00850,,3.00,,,0960,037,000031200,4.00,04,0.00,
6,H,6101,166,3,03,2.00,,01600,,,1.00,,0300000,,,000329500,3.00,00,0.00,8.00
7,H,3729,0,1,,,,,,,,,,,,,,,,
8,H,7501,109,3,03,4.00,,,02700,,3.00,,,2770,023,000143000,3.00,00,0.00,
9,H,3709,67,2,02,2.00,,01900,,,1.00,,0600000,,,000109000,5.00,00,0.00,60.00


In [5]:
#change variable types
pums_df["BDSP"]=pd.to_numeric(pums_df["BDSP"], errors='coerce')
pums_df["RNTP"]=pd.to_numeric(pums_df["RNTP"], errors='coerce')
pums_df["CONP"]=pd.to_numeric(pums_df["CONP"], errors='coerce')
pums_df["MRGP"]=pd.to_numeric(pums_df["MRGP"], errors='coerce')
pums_df["SMP"]=pd.to_numeric(pums_df["SMP"], errors='coerce')
pums_df["HINCP"]=pd.to_numeric(pums_df["HINCP"], errors='coerce')

In [6]:
# rename variables
pums_df.rename(columns={"RT":"record_type","BDSP":"number_bedrooms",
    "BLD":"units_in_structure",
    "RNTP":"monthly_rent",
    "MRGP":"first_mortgage",
    "SMP":"second_mortgage",
    "CONP":"condo_fee",
    "TEN":"tenure",
    "VACS":"vacancy_status",
    "VALP":"property_value",
    "GRPIP":"gross_rent_pct_of_income",
    "GRNTP":"gross_rent",
    "HINCP":"hh_income",
    "N65":"ppl_over_65",
    "NP":"ppl_in_hh",
    "NRC":"number_related_children",
    "WGTP":"weight",
    "TAXP":"prop_tax",
    "MV":"moved_in"}, inplace=True)
pums_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax
0,H,8513,139,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,
1,H,6708,177,0,3.00,2.00,,,,,,7.00,,,,,,,,
2,H,7301,259,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,
3,H,1903,165,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00
4,H,6509,174,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,
5,H,6506,194,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,
6,H,6101,166,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00
7,H,3729,0,1,,,,,,,,,,,,,,,,
8,H,7501,109,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,
9,H,3709,67,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00


## Part A2. Assign county id to each puma

In [5]:
# load in crosswalk file
crosswalk_df=pd.read_csv("PUMA_County_Crosswalk_v2.csv", delimiter=",")
crosswalk_df

Unnamed: 0,PUMA,county1,county2,county3,county4,county5,county6,county7
0,101,Alameda CA,,,,,,
1,102,Alameda CA,,,,,,
2,103,Alameda CA,,,,,,
3,104,Alameda CA,,,,,,
4,105,Alameda CA,,,,,,
5,106,Alameda CA,,,,,,
6,107,Alameda CA,,,,,,
7,108,Alameda CA,,,,,,
8,109,Alameda CA,,,,,,
9,110,Alameda CA,,,,,,


In [6]:
# add county name column to puma file
puma_county_df=pums_df.merge(crosswalk_df, how='left', left_on = "PUMA",right_on = "PUMA")
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7
0,H,8513,139.00,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,,Santa Clara CA,,,,,,
1,H,6708,177.00,0,3.00,2.00,,,,,,7.00,,,,,,,,,Sacramento CA,,,,,,
2,H,7301,259.00,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,,San Diego CA,,,,,,
3,H,1903,165.00,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00,Fresno CA,,,,,,
4,H,6509,174.00,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,,Riverside CA,,,,,,
5,H,6506,194.00,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,,Riverside CA,,,,,,
6,H,6101,166.00,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00,Placer CA,,,,,,
7,H,3729,0.00,1,,,,,,,,,,,,,,,,,Los Angeles CA,,,,,,
8,H,7501,109.00,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,,San Francisco CA,,,,,,
9,H,3709,67.00,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00,Los Angeles CA,,,,,,


In [7]:
# remove " CA" from end of each county name
puma_county_df['County'] = puma_county_df['county1'].str.replace(r' CA', '')
puma_county_df['county2'] = puma_county_df['county2'].str.replace(r' CA', '')
puma_county_df['county3'] = puma_county_df['county3'].str.replace(r' CA', '')
puma_county_df['county4'] = puma_county_df['county4'].str.replace(r' CA', '')
puma_county_df['county5'] = puma_county_df['county5'].str.replace(r' CA', '')
puma_county_df['county6'] = puma_county_df['county6'].str.replace(r' CA', '')
puma_county_df['county7'] = puma_county_df['county7'].str.replace(r' CA', '')
puma_county_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County
0,H,8513,139.00,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,,Santa Clara CA,,,,,,,Santa Clara
1,H,6708,177.00,0,3.00,2.00,,,,,,7.00,,,,,,,,,Sacramento CA,,,,,,,Sacramento
2,H,7301,259.00,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,,San Diego CA,,,,,,,San Diego
3,H,1903,165.00,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00,Fresno CA,,,,,,,Fresno
4,H,6509,174.00,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,,Riverside CA,,,,,,,Riverside
5,H,6506,194.00,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,,Riverside CA,,,,,,,Riverside
6,H,6101,166.00,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00,Placer CA,,,,,,,Placer
7,H,3729,0.00,1,,,,,,,,,,,,,,,,,Los Angeles CA,,,,,,,Los Angeles
8,H,7501,109.00,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,,San Francisco CA,,,,,,,San Francisco
9,H,3709,67.00,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00,Los Angeles CA,,,,,,,Los Angeles


## Part A3. Bring in and clean, reformat county AMIs data

In [8]:
#bring in 2014 county AMIs file
df = pd.read_csv("2014_AMIs.csv", delimiter=",")
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda County\n4-Per,Extremely Low,19650,22450,25250,28050,30300,32550,34800,37050
1,Alameda County\n4-Per,Very Low Income,32750,37400,42100,46750,50500,54250,58000,61750
2,Alameda County\n4-Per,Low Income,47350,54100,60850,67600,73050,78450,83850,89250
3,Alameda County\n4-Per,Median Income,65450,74800,84150,93500,101000,108450,115950,123400
4,Alameda County\n4-Per,Moderate Income,78550,89750,101000,112200,121200,130150,139150,148100
5,Alpine County\n4-Pers,Extremely Low,17150,19600,22050,24500,26500,28450,30400,32350
6,Alpine County\n4-Pers,Very Low Income,28600,32650,36750,40800,44100,47350,50600,53900
7,Alpine County\n4-Pers,Low Income,44750,51150,57550,63900,69050,74150,79250,84350
8,Alpine County\n4-Pers,Median Income,59500,68000,76500,85000,91800,98600,105400,112200
9,Alpine County\n4-Pers,Moderate Income,71400,81600,91800,102000,110150,118300,126500,134650


In [9]:
#clean county names variable
df['County'] = df['County'].str.replace(r' County\n4-Person', '')
df['County'] = df['County'].str.replace(r' County\n4-Perso', '')
df['County'] = df['County'].str.replace(r' County\n4-Pers', '')
df['County'] = df['County'].str.replace(r' County\n4-Per', '')
df['County'] = df['County'].str.replace(r' County\n4-Pe', '')
df['County'] = df['County'].str.replace(r' County\n4-P', '')
df['County'] = df['County'].str.replace(r' County\n4-', '')
df['County'] = df['County'].str.replace(r' County\n4', '')
df['County'] = df['County'].str.replace(r' County\n', '')
df['County'] = df['County'].str.replace(r' County\ ' , '')
df['County'] = df['County'].str.replace(r' County', '')
df['County'] = df['County'].str.replace(r' Count', '')
df['County'] = df['County'].str.replace(r' Coun', '')
df

Unnamed: 0,County,Income_Category,1,2,3,4,5,6,7,8
0,Alameda,Extremely Low,19650,22450,25250,28050,30300,32550,34800,37050
1,Alameda,Very Low Income,32750,37400,42100,46750,50500,54250,58000,61750
2,Alameda,Low Income,47350,54100,60850,67600,73050,78450,83850,89250
3,Alameda,Median Income,65450,74800,84150,93500,101000,108450,115950,123400
4,Alameda,Moderate Income,78550,89750,101000,112200,121200,130150,139150,148100
5,Alpine,Extremely Low,17150,19600,22050,24500,26500,28450,30400,32350
6,Alpine,Very Low Income,28600,32650,36750,40800,44100,47350,50600,53900
7,Alpine,Low Income,44750,51150,57550,63900,69050,74150,79250,84350
8,Alpine,Median Income,59500,68000,76500,85000,91800,98600,105400,112200
9,Alpine,Moderate Income,71400,81600,91800,102000,110150,118300,126500,134650


In [10]:
#create df for each income level and rename variables
eli_df = df[df.Income_Category =="Extremely Low"].copy()
eli_df.rename(columns={"1":"ELI_1",
                       "2":"ELI_2",
                       "3":"ELI_3",
                       "4":"ELI_4",
                       "5":"ELI_5",
                       "6":"ELI_6",
                       "7":"ELI_7",
                       "8":"ELI_8"}, inplace=True)
eli_df.drop('Income_Category', axis=1, inplace=True)
eli_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8
0,Alameda,19650,22450,25250,28050,30300,32550,34800,37050
5,Alpine,17150,19600,22050,24500,26500,28450,30400,32350
10,Amador,15200,17400,19550,21700,23450,25200,26950,28650
15,Butte,12350,14100,15850,17600,19050,20450,21850,23250
20,Calaveras,14700,16800,18900,21000,22700,24400,26050,27750
25,Colusa,12150,13900,15650,17350,18750,20150,21550,22950
30,Contra Costa,19650,22450,25250,28050,30300,32550,34800,37050
35,Del Norte,12150,13900,15650,17350,18750,20150,21550,22950
40,El Dorado,16000,18300,20600,22850,24700,26550,28350,30200
45,Fresno,12150,13900,15650,17350,18750,20150,21550,22950


In [11]:
vli_df = df[df.Income_Category =="Very Low Income"].copy()
vli_df.rename(columns={"1":"VLI_1",
                       "2":"VLI_2",
                       "3":"VLI_3",
                       "4":"VLI_4",
                       "5":"VLI_5",
                       "6":"VLI_6",
                       "7":"VLI_7",
                       "8":"VLI_8"}, inplace=True)
vli_df.drop('Income_Category', axis=1, inplace=True)
vli_df

Unnamed: 0,County,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8
1,Alameda,32750,37400,42100,46750,50500,54250,58000,61750
6,Alpine,28600,32650,36750,40800,44100,47350,50600,53900
11,Amador,25350,28950,32550,36150,39050,41950,44850,47750
16,Butte,20550,23500,26450,29350,31700,34050,36400,38750
21,Calaveras,24500,28000,31500,35000,37800,40600,43400,46200
26,Colusa,20300,23200,26100,28950,31300,33600,35900,38250
31,Contra Costa,32750,37400,42100,46750,50500,54250,58000,61750
36,Del Norte,20300,23200,26100,28950,31300,33600,35900,38250
41,El Dorado,26650,30450,34250,38050,41100,44150,47200,50250
46,Fresno,20300,23200,26100,28950,31300,33600,35900,38250


In [12]:
li_df = df[df.Income_Category =="Low Income"].copy()
li_df.rename(columns={"1":"LI_1",
                       "2":"LI_2",
                       "3":"LI_3",
                       "4":"LI_4",
                       "5":"LI_5",
                       "6":"LI_6",
                       "7":"LI_7",
                       "8":"LI_8"}, inplace=True)
li_df.drop('Income_Category', axis=1, inplace=True)
li_df

Unnamed: 0,County,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8
2,Alameda,47350,54100,60850,67600,73050,78450,83850,89250
7,Alpine,44750,51150,57550,63900,69050,74150,79250,84350
12,Amador,40500,46300,52100,57850,62500,67150,71750,76400
17,Butte,32900,37600,42300,46950,50750,54500,58250,62000
22,Calaveras,39200,44800,50400,56000,60500,65000,69450,73950
27,Colusa,32450,37050,41700,46300,50050,53750,57450,61150
32,Contra Costa,47350,54100,60850,67600,73050,78450,83850,89250
37,Del Norte,32450,37050,41700,46300,50050,53750,57450,61150
42,El Dorado,42650,48750,54850,60900,65800,70650,75550,80400
47,Fresno,32450,37050,41700,46300,50050,53750,57450,61150


In [13]:
mi_df = df[df.Income_Category =="Median Income"].copy()
mi_df.rename(columns={"1":"MI_1",
                       "2":"MI_2",
                       "3":"MI_3",
                       "4":"MI_4",
                       "5":"MI_5",
                       "6":"MI_6",
                       "7":"MI_7",
                       "8":"MI_8"}, inplace=True)
mi_df.drop('Income_Category', axis=1, inplace=True)
mi_df

Unnamed: 0,County,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8
3,Alameda,65450,74800,84150,93500,101000,108450,115950,123400
8,Alpine,59500,68000,76500,85000,91800,98600,105400,112200
13,Amador,50600,57850,65050,72300,78100,83850,89650,95450
18,Butte,41100,46950,52850,58700,63400,68100,72800,77500
23,Calaveras,49000,56000,63000,70000,75600,81200,86800,92400
28,Colusa,40550,46300,52100,57900,62550,67150,71800,76450
33,Contra Costa,65450,74800,84150,93500,101000,108450,115950,123400
38,Del Norte,40550,46300,52100,57900,62550,67150,71800,76450
43,El Dorado,53250,60900,68500,76100,82200,88300,94350,100450
48,Fresno,40550,46300,52100,57900,62550,67150,71800,76450


In [14]:
moi_df = df[df.Income_Category =="Moderate Income"].copy()
moi_df.rename(columns={"1":"MoI_1",
                       "2":"MoI_2",
                       "3":"MoI_3",
                       "4":"MoI_4",
                       "5":"MoI_5",
                       "6":"MoI_6",
                       "7":"MoI_7",
                       "8":"MoI_8"}, inplace=True)
moi_df.drop('Income_Category', axis=1, inplace=True)
moi_df

Unnamed: 0,County,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
4,Alameda,78550,89750,101000,112200,121200,130150,139150,148100
9,Alpine,71400,81600,91800,102000,110150,118300,126500,134650
14,Amador,60700,69400,78100,86750,93700,100650,107550,114500
19,Butte,49300,56350,63400,70450,76100,81700,87350,93000
24,Calaveras,58800,67200,75600,84000,90700,97450,104150,110900
29,Colusa,48650,55600,62550,69500,75050,80600,86200,91750
34,Contra Costa,78550,89750,101000,112200,121200,130150,139150,148100
39,Del Norte,48650,55600,62550,69500,75050,80600,86200,91750
44,El Dorado,63900,73050,82150,91300,98600,105900,113200,120500
49,Fresno,48650,55600,62550,69500,75050,80600,86200,91750


In [15]:
#combine dfs
all_amis_df=eli_df.merge(vli_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(li_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(mi_df, how='left', left_on = "County",right_on = "County")
all_amis_df=all_amis_df.merge(moi_df, how='left', left_on = "County",right_on = "County")
all_amis_df

Unnamed: 0,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,Alameda,19650,22450,25250,28050,30300,32550,34800,37050,32750,37400,42100,46750,50500,54250,58000,61750,47350,54100,60850,67600,73050,78450,83850,89250,65450,74800,84150,93500,101000,108450,115950,123400,78550,89750,101000,112200,121200,130150,139150,148100
1,Alpine,17150,19600,22050,24500,26500,28450,30400,32350,28600,32650,36750,40800,44100,47350,50600,53900,44750,51150,57550,63900,69050,74150,79250,84350,59500,68000,76500,85000,91800,98600,105400,112200,71400,81600,91800,102000,110150,118300,126500,134650
2,Amador,15200,17400,19550,21700,23450,25200,26950,28650,25350,28950,32550,36150,39050,41950,44850,47750,40500,46300,52100,57850,62500,67150,71750,76400,50600,57850,65050,72300,78100,83850,89650,95450,60700,69400,78100,86750,93700,100650,107550,114500
3,Butte,12350,14100,15850,17600,19050,20450,21850,23250,20550,23500,26450,29350,31700,34050,36400,38750,32900,37600,42300,46950,50750,54500,58250,62000,41100,46950,52850,58700,63400,68100,72800,77500,49300,56350,63400,70450,76100,81700,87350,93000
4,Calaveras,14700,16800,18900,21000,22700,24400,26050,27750,24500,28000,31500,35000,37800,40600,43400,46200,39200,44800,50400,56000,60500,65000,69450,73950,49000,56000,63000,70000,75600,81200,86800,92400,58800,67200,75600,84000,90700,97450,104150,110900
5,Colusa,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750
6,Contra Costa,19650,22450,25250,28050,30300,32550,34800,37050,32750,37400,42100,46750,50500,54250,58000,61750,47350,54100,60850,67600,73050,78450,83850,89250,65450,74800,84150,93500,101000,108450,115950,123400,78550,89750,101000,112200,121200,130150,139150,148100
7,Del Norte,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750
8,El Dorado,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
9,Fresno,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750


## Part A4. Create dummy variables for each income category and assign to households by number of people and hh income

In [16]:
# merge datasets
hh_df=puma_county_df.merge(all_amis_df, how='left', left_on = "County",right_on = "County")
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8
0,H,8513,139.00,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,,Santa Clara CA,,,,,,,Santa Clara,22300,25500,28650,31850,34400,36950,39500,42050,37150,42450,47750,53050,57300,61550,65800,70050,59400,67900,76400,84900,91650,98450,105250,112050,73850,84400,94950,105500,113950,122400,130800,139250,88600,101300,113950,126600,136750,146850,157000,167100
1,H,6708,177.00,0,3.00,2.00,,,,,,7.00,,,,,,,,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
2,H,7301,259.00,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,,San Diego CA,,,,,,,San Diego,17350,19850,22300,24800,26800,28750,30750,32750,28900,33050,37150,41300,44600,47900,51200,54500,46250,52900,59500,66100,71400,76700,81950,87250,53150,60700,68300,75900,81950,88050,94100,100200,63750,72900,82000,91100,98400,105700,112950,120250
3,H,1903,165.00,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00,Fresno CA,,,,,,,Fresno,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750
4,H,6509,174.00,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950
5,H,6506,194.00,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950
6,H,6101,166.00,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00,Placer CA,,,,,,,Placer,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500
7,H,3729,0.00,1,,,,,,,,,,,,,,,,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650
8,H,7501,109.00,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,,San Francisco CA,,,,,,,San Francisco,23750,27150,30550,33950,36650,39400,42100,44800,39600,45250,50900,56550,61050,65600,70100,74650,63350,72400,81450,90500,97700,104950,112200,119450,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150
9,H,3709,67.00,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650


In [17]:
#create count variables for households in each income group

hh_df["ELI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.ELI_8),1,0)
hh_df["VLI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.VLI_1)&(hh_df.hh_income>hh_df.ELI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.VLI_2)&(hh_df.hh_income>hh_df.ELI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.VLI_3)&(hh_df.hh_income>hh_df.ELI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.VLI_4)&(hh_df.hh_income>hh_df.ELI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.VLI_5)&(hh_df.hh_income>hh_df.ELI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.VLI_6)&(hh_df.hh_income>hh_df.ELI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.VLI_7)&(hh_df.hh_income>hh_df.ELI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.VLI_8)&(hh_df.hh_income>hh_df.ELI_8),1,0)
hh_df["LI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.LI_1)&(hh_df.hh_income>hh_df.VLI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.LI_2)&(hh_df.hh_income>hh_df.VLI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.LI_3)&(hh_df.hh_income>hh_df.VLI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.LI_4)&(hh_df.hh_income>hh_df.VLI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.LI_5)&(hh_df.hh_income>hh_df.VLI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.LI_6)&(hh_df.hh_income>hh_df.VLI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.LI_7)&(hh_df.hh_income>hh_df.VLI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.LI_8)&(hh_df.hh_income>hh_df.VLI_8),1,0)
hh_df["MoI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income<=hh_df.MoI_1)&(hh_df.hh_income>hh_df.LI_1)|
                            (hh_df.ppl_in_hh==2)&(hh_df.hh_income<=hh_df.MoI_2)&(hh_df.hh_income>hh_df.LI_2)|
                            (hh_df.ppl_in_hh==3)&(hh_df.hh_income<=hh_df.MoI_3)&(hh_df.hh_income>hh_df.LI_3)|
                            (hh_df.ppl_in_hh==4)&(hh_df.hh_income<=hh_df.MoI_4)&(hh_df.hh_income>hh_df.LI_4)|
                            (hh_df.ppl_in_hh==5)&(hh_df.hh_income<=hh_df.MoI_5)&(hh_df.hh_income>hh_df.LI_5)|
                            (hh_df.ppl_in_hh==6)&(hh_df.hh_income<=hh_df.MoI_6)&(hh_df.hh_income>hh_df.LI_6)|
                            (hh_df.ppl_in_hh==7)&(hh_df.hh_income<=hh_df.MoI_7)&(hh_df.hh_income>hh_df.LI_7)|
                            (hh_df.ppl_in_hh==8)&(hh_df.hh_income<=hh_df.MoI_8)&(hh_df.hh_income>hh_df.LI_8),1,0)
hh_df["HI_count"]=np.where((hh_df.ppl_in_hh==1)&(hh_df.hh_income>hh_df.MoI_1)|
                           (hh_df.ppl_in_hh==2)&(hh_df.hh_income>hh_df.MoI_2)|
                           (hh_df.ppl_in_hh==3)&(hh_df.hh_income>hh_df.MoI_3)|
                           (hh_df.ppl_in_hh==4)&(hh_df.hh_income>hh_df.MoI_4)|
                           (hh_df.ppl_in_hh==5)&(hh_df.hh_income>hh_df.MoI_5)|
                           (hh_df.ppl_in_hh==6)&(hh_df.hh_income>hh_df.MoI_6)|
                           (hh_df.ppl_in_hh==7)&(hh_df.hh_income>hh_df.MoI_7)|
                           (hh_df.ppl_in_hh==8)&(hh_df.hh_income>hh_df.MoI_8),1,0)
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count
0,H,8513,139.00,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,,Santa Clara CA,,,,,,,Santa Clara,22300,25500,28650,31850,34400,36950,39500,42050,37150,42450,47750,53050,57300,61550,65800,70050,59400,67900,76400,84900,91650,98450,105250,112050,73850,84400,94950,105500,113950,122400,130800,139250,88600,101300,113950,126600,136750,146850,157000,167100,1,0,0,0,0
1,H,6708,177.00,0,3.00,2.00,,,,,,7.00,,,,,,,,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,0
2,H,7301,259.00,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,,San Diego CA,,,,,,,San Diego,17350,19850,22300,24800,26800,28750,30750,32750,28900,33050,37150,41300,44600,47900,51200,54500,46250,52900,59500,66100,71400,76700,81950,87250,53150,60700,68300,75900,81950,88050,94100,100200,63750,72900,82000,91100,98400,105700,112950,120250,0,0,0,1,0
3,H,1903,165.00,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00,Fresno CA,,,,,,,Fresno,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750,0,0,0,0,1
4,H,6509,174.00,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,1,0
5,H,6506,194.00,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,1,0,0,0
6,H,6101,166.00,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00,Placer CA,,,,,,,Placer,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1
7,H,3729,0.00,1,,,,,,,,,,,,,,,,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,0
8,H,7501,109.00,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,,San Francisco CA,,,,,,,San Francisco,23750,27150,30550,33950,36650,39400,42100,44800,39600,45250,50900,56550,61050,65600,70100,74650,63350,72400,81450,90500,97700,104950,112200,119450,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150,0,0,0,0,1
9,H,3709,67.00,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1


## Part. A5. Create count variables for households and people in each income category by  multiplying each dummy by weight variable and number of people in household

In [18]:
hh_df["14_ELI_hh_count"]=hh_df.ELI_count*hh_df.weight
hh_df["14_ELI_ppl_count"]=hh_df.ELI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["14_VLI_hh_count"]=hh_df.VLI_count*hh_df.weight
hh_df["14_VLI_ppl_count"]=hh_df.VLI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["14_LI_hh_count"]=hh_df.LI_count*hh_df.weight
hh_df["14_LI_ppl_count"]=hh_df.LI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["14_MoI_hh_count"]=hh_df.MoI_count*hh_df.weight
hh_df["14_MoI_ppl_count"]=hh_df.MoI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df["14_HI_hh_count"]=hh_df.HI_count*hh_df.weight
hh_df["14_HI_ppl_count"]=hh_df.HI_count*hh_df.ppl_in_hh*hh_df.weight
hh_df

Unnamed: 0,record_type,PUMA,weight,ppl_in_hh,number_bedrooms,units_in_structure,condo_fee,first_mortgage,monthly_rent,second_mortgage,tenure,vacancy_status,property_value,gross_rent,gross_rent_pct_of_income,hh_income,moved_in,number_related_children,R65,prop_tax,county1,county2,county3,county4,county5,county6,county7,County,ELI_1,ELI_2,ELI_3,ELI_4,ELI_5,ELI_6,ELI_7,ELI_8,VLI_1,VLI_2,VLI_3,VLI_4,VLI_5,VLI_6,VLI_7,VLI_8,LI_1,LI_2,LI_3,LI_4,LI_5,LI_6,LI_7,LI_8,MI_1,MI_2,MI_3,MI_4,MI_5,MI_6,MI_7,MI_8,MoI_1,MoI_2,MoI_3,MoI_4,MoI_5,MoI_6,MoI_7,MoI_8,ELI_count,VLI_count,LI_count,MoI_count,HI_count,14_ELI_hh_count,14_ELI_ppl_count,14_VLI_hh_count,14_VLI_ppl_count,14_LI_hh_count,14_LI_ppl_count,14_MoI_hh_count,14_MoI_ppl_count,14_HI_hh_count,14_HI_ppl_count
0,H,8513,139.00,2,2.00,5.00,,,700.00,,3.00,,,0700,056,15000.00,1.00,00,0.00,,Santa Clara CA,,,,,,,Santa Clara,22300,25500,28650,31850,34400,36950,39500,42050,37150,42450,47750,53050,57300,61550,65800,70050,59400,67900,76400,84900,91650,98450,105250,112050,73850,84400,94950,105500,113950,122400,130800,139250,88600,101300,113950,126600,136750,146850,157000,167100,1,0,0,0,0,139.00,278.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1,H,6708,177.00,0,3.00,2.00,,,,,,7.00,,,,,,,,,Sacramento CA,,,,,,,Sacramento,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2,H,7301,259.00,4,4.00,2.00,,,2300.00,,3.00,,,2451,040,73000.00,2.00,01,0.00,,San Diego CA,,,,,,,San Diego,17350,19850,22300,24800,26800,28750,30750,32750,28900,33050,37150,41300,44600,47900,51200,54500,46250,52900,59500,66100,71400,76700,81950,87250,53150,60700,68300,75900,81950,88050,94100,100200,63750,72900,82000,91100,98400,105700,112950,120250,0,0,0,1,0,0.00,0.00,0.00,0.00,0.00,0.00,259.00,1036.00,0.00,0.00
3,H,1903,165.00,3,3.00,2.00,,1100.00,,,1.00,,0215000,,,86000.00,3.00,01,0.00,52.00,Fresno CA,,,,,,,Fresno,12150,13900,15650,17350,18750,20150,21550,22950,20300,23200,26100,28950,31300,33600,35900,38250,32450,37050,41700,46300,50050,53750,57450,61150,40550,46300,52100,57900,62550,67150,71800,76450,48650,55600,62550,69500,75050,80600,86200,91750,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,165.00,495.00
4,H,6509,174.00,3,3.00,2.00,,,650.00,,3.00,,,0990,018,64500.00,3.00,01,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,0,0,1,0,0.00,0.00,0.00,0.00,0.00,0.00,174.00,522.00,0.00,0.00
5,H,6506,194.00,7,3.00,1.00,,,850.00,,3.00,,,0960,037,31200.00,4.00,04,0.00,,Riverside CA,,,,,,,Riverside,14100,16100,18100,20100,21750,23350,24950,26550,23450,26800,30150,33500,36200,38900,41550,44250,37550,42900,48250,53600,57900,62200,66500,70800,45500,52000,58500,65000,70200,75400,80600,85800,54600,62400,70200,78000,84250,90500,96700,102950,0,1,0,0,0,0.00,0.00,194.00,1358.00,0.00,0.00,0.00,0.00,0.00,0.00
6,H,6101,166.00,3,3.00,2.00,,1600.00,,,1.00,,0300000,,,329500.00,3.00,00,0.00,8.00,Placer CA,,,,,,,Placer,16000,18300,20600,22850,24700,26550,28350,30200,26650,30450,34250,38050,41100,44150,47200,50250,42650,48750,54850,60900,65800,70650,75550,80400,53250,60900,68500,76100,82200,88300,94350,100450,63900,73050,82150,91300,98600,105900,113200,120500,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,166.00,498.00
7,H,3729,0.00,1,,,,,,,,,,,,,,,,,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
8,H,7501,109.00,3,3.00,4.00,,,2700.00,,3.00,,,2770,023,143000.00,3.00,00,0.00,,San Francisco CA,,,,,,,San Francisco,23750,27150,30550,33950,36650,39400,42100,44800,39600,45250,50900,56550,61050,65600,70100,74650,63350,72400,81450,90500,97700,104950,112200,119450,72100,82400,92700,103000,111250,119500,127700,135950,86500,98900,111250,123600,133500,143400,153250,163150,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,109.00,327.00
9,H,3709,67.00,2,2.00,2.00,,1900.00,,,1.00,,0600000,,,109000.00,5.00,00,0.00,60.00,Los Angeles CA,,,,,,,Los Angeles,17950,20500,23050,25600,27650,29700,31750,33800,29900,34200,38450,42700,46150,49550,52950,56400,47850,54650,61500,68300,73800,79250,84700,90200,45350,51850,58300,64800,70000,75150,80350,85550,54450,62200,70000,77750,83950,90200,96400,102650,0,0,0,0,1,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,67.00,134.00


# Part C1. Aggregating at the PUMA level

In [19]:
puma_sums_df = hh_df.groupby("PUMA").sum()
puma_counts_df = puma_sums_df[["14_ELI_hh_count","14_ELI_ppl_count",
                              "14_VLI_hh_count",
                              "14_VLI_ppl_count",
                              "14_LI_hh_count",
                              "14_LI_ppl_count",
                              "14_MoI_hh_count",
                              "14_MoI_ppl_count",
                              "14_HI_hh_count",
                              "14_HI_ppl_count"]].copy()
puma_counts_df

Unnamed: 0_level_0,14_ELI_hh_count,14_ELI_ppl_count,14_VLI_hh_count,14_VLI_ppl_count,14_LI_hh_count,14_LI_ppl_count,14_MoI_hh_count,14_MoI_ppl_count,14_HI_hh_count,14_HI_ppl_count
PUMA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
101,11278.00,22803.00,4381.00,10627.00,3939.00,8767.00,9228.00,19420.00,21760.00,55209.00
102,19067.00,35733.00,12644.00,28940.00,9821.00,20337.00,13194.00,28024.00,18026.00,36634.00
103,5584.00,9162.00,5033.00,9201.00,3449.00,8000.00,11307.00,22414.00,28070.00,71619.00
104,12352.00,36341.00,8142.00,26397.00,5701.00,19233.00,8693.00,29058.00,4722.00,12698.00
105,8366.00,16369.00,8447.00,20200.00,7826.00,21264.00,16029.00,39964.00,24327.00,67020.00
106,8004.00,20054.00,7084.00,18426.00,5405.00,13669.00,13080.00,38939.00,13695.00,37102.00
107,7538.00,18257.00,7708.00,20438.00,5790.00,16782.00,13704.00,44047.00,14185.00,44692.00
108,4638.00,12387.00,3100.00,9116.00,4150.00,13701.00,10513.00,36597.00,17855.00,56268.00
109,6135.00,12520.00,5167.00,12317.00,6548.00,20881.00,15131.00,49426.00,32451.00,97605.00
110,5003.00,7342.00,6835.00,15551.00,5925.00,15056.00,16604.00,46443.00,44089.00,123757.00


In [20]:
#export it
puma_counts_df.to_csv("2014_ppl_hh_puma_counts.csv")