## Pop & Housing variables pulled from 2010, and 2020 Censuses (using Cen API) for urban villages,  City of Phoenix and U.S.
### added Maricopa County, State of Arizona, and comparable cities

Total Housing Units, Population, Population by Ethncity, Total Occupied Housing Units,  

Note: to update list of comp cities, adjust dictionary in getters script. also note Indianapolis is a consolidated city.

In [1]:
import sys
import pandas as pd
import numpy as np

In [2]:
sys.path.append('..')
import getters as get

In [5]:
#import block group data
bg20 = pd.read_csv('../../data/geo/qaqc/bgp-puma-outside-20.csv')
bg20.geoid = bg20.geoid.apply(lambda x: '{0:0>12}'.format(x))
bg20 = bg20[['geoid','puma']].copy() 

In [7]:
phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

In [9]:
## set sources, define variable lists by Decennial Census year
#SF1 Dec in 2010, Redistricting file in 2020 (until 2020 tables are released)

source_dec = 'dec/sf1'
source_red = 'dec/pl'

#variables for each Census
years = {'2010':'P001001,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H003001,H003002,H003003',\
        '2020':'P1_001N,P2_002N,P2_005N,P2_006N,P2_007N,P2_008N,P2_009N,P2_010N,P2_011N,H1_001N,H1_002N,H1_003N'}

#rename 2010 columns to group
col_10_rename={'P001001':'Pop_10E','P005003':'P_Wh_10E','P005004':'P_Bl_10E','P005006':'P_As_10E','P005010':'P_Hi_10E',\
               'P005005':'P_Ot_10E','P005007':'P_Ot_10E','P005008':'P_Ot_10E','P005009':'P_Ot_10E',\
              'H003001':'Hou_10E','H003002':'Hou_O_10E','H003003':'Hou_V_10E'}

#rename 2020 columns to group
col_20_rename = {'P1_001N':'Pop_20E','P2_002N':'P_Hi_20E',\
              'P2_005N':'P_Wh_20E','P2_006N':'P_Bl_20E',\
              'P2_007N':'P_Ot_20E','P2_008N':'P_As_20E','P2_009N':'P_Ot_20E',\
              'P2_010N':'P_Ot_20E','P2_011N':'P_Ot_20E','H1_001N':'Hou_20E',\
              'H1_002N':'Hou_O_20E','H1_003N':'Hou_V_20E'}

### Get block group data for bgs outside of PHX border included in PUMAs

In [10]:
df20 = get.get_bgp(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [13]:
df20 = pd.merge(bg20,df20,how='left',left_on='geoid',right_on='GEO_ID')

In [16]:
df20.drop(columns=['geoid','GEO_ID'],inplace=True)

In [19]:
for col in df20.columns[1:]:
    df20[col] = df20[col].astype(int)

In [22]:
df20.rename(columns=col_20_rename,inplace=True)
df20 = df20.groupby(df20.columns,axis=1).sum().groupby(['puma']).sum().reset_index()

In [23]:
df20.head()

Unnamed: 0,puma,Hou_20E,Hou_O_20E,Hou_V_20E,P_As_20E,P_Bl_20E,P_Hi_20E,P_Ot_20E,P_Wh_20E,Pop_20E
0,400112,38642,31315,7327,2535,815,4057,2736,61268,71411
1,400118,1218,1092,126,203,209,645,246,1367,2670
2,400119,1222,1185,37,165,801,2203,228,936,4333
3,400121,7002,6681,321,626,2519,15439,964,4139,23687
4,400129,38714,35759,2955,5138,2032,11589,5604,79210,103573


### Get PUMA data - build from tracts

In [27]:
#read in 2020 tract to PUMA equivalency from Census website
tract_puma = pd.read_table('https://www2.census.gov/geo/docs/maps-data/data/rel2020/2020_Census_Tract_to_2020_PUMA.txt',sep=',')
tract_puma['puma'] = '0'+tract_puma['STATEFP'].astype(str)\
                      +tract_puma.PUMA5CE.astype(str).str.zfill(5)
tract_puma['tract'] = '0'+tract_puma['STATEFP'].astype(str)+'0'+tract_puma['COUNTYFP'].astype(str)\
                      +tract_puma.TRACTCE.astype(str).str.zfill(6)
tract_puma = tract_puma[(tract_puma.STATEFP==4)&(tract_puma.COUNTYFP==13)]

In [35]:
p20 = get.get_tract(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [39]:
p20['geoid'] = p20.state+p20.county+p20.tract
p20.drop(columns=['state','county','tract'],inplace=True)

In [41]:
p20 = pd.merge(p20,tract_puma,how='left',left_on='geoid',right_on='tract')
p20.drop(columns=['geoid','STATEFP','COUNTYFP','TRACTCE','PUMA5CE','tract'],inplace=True)

In [46]:
p20 = p20[['puma']+[col for col in p20.columns if col != 'puma']]

In [50]:
for col in p20.columns[1:]:
    p20[col] = p20[col].astype(int)

In [52]:
p20.rename(columns=col_20_rename,inplace=True)
p20 = p20.groupby(p20.columns,axis=1).sum().groupby(['puma']).sum().reset_index()

In [54]:
p20 = p20[p20.puma.isin(phx_pumas)].copy()

In [55]:
p20.head()

Unnamed: 0,puma,Hou_20E,Hou_O_20E,Hou_V_20E,P_As_20E,P_Bl_20E,P_Hi_20E,P_Ot_20E,P_Wh_20E,Pop_20E
11,400112,65624,55306,10318,6935,1806,8754,5717,107515,130727
12,400113,48722,44205,4517,7302,1770,12662,5716,77243,104693
13,400114,50696,47306,3390,4097,4625,30096,6461,66076,111355
14,400115,47277,43966,3311,4167,6966,32622,7237,58663,109655
15,400116,53568,48378,5190,3782,10300,35482,8804,52280,110648


In [56]:
with pd.ExcelWriter(f'PUMA_pop.xlsx') as writer:
    p20.to_excel(writer, sheet_name="puma_total")
    df20.to_excel(writer, sheet_name="outside_phx")