### This notebook generates the household_extras table that's used for splicing additional PUMS series into the MTC synthetic population

Sam Maurer, July 2015

In [1]:
import numpy as np
import pandas as pd
import zipfile
pd.set_option('display.max_columns', 500)

In [2]:
bay_area_cfips = [1,13,41,55,75,81,85,95,97]

In [16]:
# load household records
z = zipfile.ZipFile('../data/csv_hca_2012_5yr.zip')
df1 = pd.read_csv(z.open('ss12hca.csv'))
print len(df1)

737966


In [17]:
# limit to Bay Area counties
cfips = np.floor(df1.PUMA00/100) # county fips
df_h = df1[cfips.isin(bay_area_cfips)]
print len(df_h)

68197


In [7]:
# load person records
z = zipfile.ZipFile('../data/csv_pca_2013_5yr.zip')
df2 = pd.read_csv(z.open('ss13pca.csv'))
print len(df2)

1821970


  data = self._reader.read(nrows)


In [8]:
# limit to Bay Area and heads of household
cfips = np.floor(df2.PUMA00/100) # county fips
df_p = df2[cfips.isin(bay_area_cfips) & (df2.RELP == 0)]
print len(df_p)

43849


In [None]:
# HOUSEHOLD RECORDS
# TEN is tenure: 1 and 2 = owned, 3 = rented

# PERSON RECORDS
# RAC1P is race code: 1 = white, 2 = black, 6 = asian
# HISP is hispanic code: >1 = hispanic

In [9]:
# merge and discard unneeded columns
df = df_h[['SERIALNO','TEN']].merge(df_p[['SERIALNO','RAC1P','HISP']], on='SERIALNO')
print len(df_p)

43849


In [10]:
# rename to lowercase for consistency with urbansim
df.columns = [s.lower() for s in df.columns.values]

In [11]:
# set index
df = df.set_index('serialno')

In [12]:
# fix data type of tenure
df['ten'] = df.ten.astype(int)

In [13]:
df.head(6)

Unnamed: 0_level_0,ten,rac1p,hisp
serialno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009000000029,1,1,1
2009000000216,1,1,1
2009000000382,1,6,1
2009000000397,3,1,1
2009000000430,2,1,1
2009000000475,1,1,1


In [71]:
# save to data folder
df.to_csv('../data/household_extras.csv')

In [28]:
df.TEN.dtype

dtype('float64')

In [15]:
df.DIVISION.dtype

dtype('int64')

In [22]:
cfips = np.floor(df.PUMA/100)
cfips.head(5)

0    73
1    37
2    85
3     3
4    79
Name: PUMA, dtype: float64

In [18]:
df1.head(5)

Unnamed: 0,RT,SERIALNO,DIVISION,PUMA00,PUMA10,REGION,ST,ADJHSG,ADJINC,WGTP,NP,TYPE,ACR,AGS,BATH,BDSP,BLD,BUS,CONP,ELEP,FS,FULP,GASP,HFL,INSP,MHP,MRGI,MRGP,MRGT,MRGX,REFR,RMSP,RNTM,RNTP,RWAT,SINK,SMP,STOV,TEL,TEN,TOIL,VACS,VALP,VEH,WATP,YBL,FES,FINCP,FPARC,GRNTP,GRPIP,HHL,HHT,HINCP,HUGCL,HUPAC,HUPAOC,HUPARC,KIT,LNGI,MULTG,MV,NOC,NPF,NPP,NR,NRC,OCPIP,PARTNER,PLM,PSF,R18,R60,R65,RESMODE,SMOCP,SMX,SRNT,SVAL,TAXP,WIF,WKEXREL,WORKSTAT,FACRP,FAGSP,FBATHP,FBDSP,FBLDP,FBUSP,FCONP,FELEP,FFSP,FFULP,FGASP,FHFLP,FINSP,FKITP,FMHP,FMRGIP,FMRGP,FMRGTP,FMRGXP,FMVP,FPLMP,FREFRP,FRMSP,FRNTMP,FRNTP,FRWATP,FSINKP,FSMP,FSMXHP,FSMXSP,FSTOVP,FTAXP,FTELP,FTENP,FTOILP,FVACSP,FVALP,FVEHP,FWATP,FYBLP,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,WGTP6,WGTP7,WGTP8,WGTP9,WGTP10,WGTP11,WGTP12,WGTP13,WGTP14,WGTP15,WGTP16,WGTP17,WGTP18,WGTP19,WGTP20,WGTP21,WGTP22,WGTP23,WGTP24,WGTP25,WGTP26,WGTP27,WGTP28,WGTP29,WGTP30,WGTP31,WGTP32,WGTP33,WGTP34,WGTP35,WGTP36,WGTP37,WGTP38,WGTP39,WGTP40,WGTP41,WGTP42,WGTP43,WGTP44,WGTP45,WGTP46,WGTP47,WGTP48,WGTP49,WGTP50,WGTP51,WGTP52,WGTP53,WGTP54,WGTP55,WGTP56,WGTP57,WGTP58,WGTP59,WGTP60,WGTP61,WGTP62,WGTP63,WGTP64,WGTP65,WGTP66,WGTP67,WGTP68,WGTP69,WGTP70,WGTP71,WGTP72,WGTP73,WGTP74,WGTP75,WGTP76,WGTP77,WGTP78,WGTP79,WGTP80
0,H,2008000000004,9,8004,-9,4,6,1066414,1086024,30,2,1,,,1,1,5,,0,50.0,2.0,2.0,10.0,1.0,,,,,,,1,3,2.0,620.0,1,1,,1,1.0,3.0,1,,,1.0,1.0,6,4.0,29000.0,4.0,680.0,28.0,1.0,1.0,29000.0,0.0,4.0,4.0,4.0,1,1.0,1.0,4.0,0.0,2.0,0.0,0.0,0.0,,0.0,1,0.0,0.0,2.0,2.0,1,,,1,0,,0.0,9.0,9.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,48,26,8,27,32,26,41,7,53,10,42,33,29,22,9,28,44,32,10,33,28,52,32,7,10,9,39,35,31,29,41,58,44,56,37,10,31,53,26,8,49,25,8,30,35,29,42,6,56,10,45,30,31,24,8,27,41,30,9,37,27,52,35,8,9,8,43,37,27,28,43,50,41,59,34,10,30,48,26
1,H,2008000000010,9,400,-9,4,6,1066414,1086024,19,2,1,1.0,,1,3,2,2.0,0,90.0,2.0,300.0,3.0,4.0,500.0,,,,,3.0,1,6,,,1,1,,1,1.0,2.0,1,,300000.0,2.0,790.0,4,4.0,47600.0,4.0,,,1.0,1.0,47600.0,0.0,4.0,4.0,4.0,1,1.0,1.0,6.0,0.0,2.0,0.0,0.0,0.0,8.0,0.0,1,0.0,0.0,2.0,2.0,2,306.0,,0,1,22.0,0.0,9.0,9.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,19,29,5,20,24,20,6,15,30,22,5,4,17,22,7,24,17,27,16,6,14,5,27,22,26,23,26,25,6,23,45,29,24,17,32,9,15,5,21,33,17,28,7,15,15,19,6,15,34,14,6,6,17,18,6,36,21,30,18,7,28,7,25,18,17,15,27,18,5,14,39,41,21,22,22,7,28,5,19
2,H,2008000000014,9,5420,-9,4,6,1066414,1086024,25,1,1,,,1,1,7,,0,60.0,2.0,2.0,4.0,3.0,,,,,,,1,3,2.0,750.0,1,1,,1,1.0,3.0,1,,,1.0,550.0,5,,,,860.0,47.0,5.0,6.0,22000.0,0.0,4.0,4.0,4.0,1,1.0,1.0,3.0,0.0,,0.0,0.0,0.0,,0.0,1,0.0,0.0,0.0,0.0,2,,,1,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30,42,5,22,21,33,35,26,9,22,21,27,12,7,24,9,19,24,17,25,47,28,30,34,9,30,7,9,25,10,50,19,36,43,36,20,8,26,40,5,23,9,31,34,22,11,30,19,45,22,26,10,43,59,23,47,29,11,32,22,8,21,27,9,28,28,44,28,16,31,8,24,27,27,6,26,36,24,8,50
3,H,2008000000040,9,3301,-9,4,6,1066414,1086024,58,0,1,,,1,0,8,,0,,,,,,,,,,,,1,1,2.0,750.0,1,1,,1,,,1,1.0,,,,6,,,,,,,,,,,,,1,,,,,,,,,,,1,,,,,2,,,1,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,58,56,18,16,15,57,96,57,18,58,54,17,60,97,99,99,59,55,91,57,57,54,17,18,18,57,99,58,17,58,55,17,58,91,92,93,60,54,18,55,60,60,103,94,99,56,16,60,98,58,50,90,63,17,18,17,60,58,17,57,63,58,92,88,107,61,15,60,94,59,56,98,52,15,16,17,55,54,91
4,H,2008000000044,9,8111,-9,4,6,1066414,1086024,55,1,1,,,1,1,9,,0,60.0,2.0,2.0,3.0,3.0,,,,,,,1,3,2.0,1400.0,1,1,,1,1.0,3.0,1,,,1.0,2.0,5,,,,1460.0,31.0,1.0,4.0,57000.0,0.0,4.0,4.0,4.0,1,1.0,1.0,2.0,0.0,,0.0,0.0,0.0,,0.0,1,0.0,0.0,0.0,0.0,2,,,1,0,,,,,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,17,102,88,16,51,104,17,14,59,47,98,63,52,50,55,101,57,53,17,51,16,83,91,15,53,90,15,16,55,59,100,52,54,51,52,90,60,55,15,57,87,15,16,85,54,19,103,96,54,60,17,60,50,55,53,15,56,54,106,59,99,17,16,89,52,16,81,88,63,50,15,53,55,55,51,15,54,53,97,64


In [33]:
df1.TEN.describe()

count    130145.000000
mean          1.998663
std           0.930373
min           1.000000
25%           1.000000
50%           2.000000
75%           3.000000
max           4.000000
Name: TEN, dtype: float64