## Pop & Housing variables pulled from 2010, and 2020 Censuses (using Cen API) for urban villages,  City of Phoenix and U.S.
### added Maricopa County, State of Arizona, and comparable cities

Total Housing Units, Population, Population by Ethncity, Total Occupied Housing Units,  

Note: to update list of comp cities, adjust dictionary in getters script

In [1]:
import pandas as pd
import numpy as np

In [2]:
import getters as get

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
## set sources, define variable lists by Decennial Census year
#SF1 Dec in 2010, Redistricting file in 2020 (until 2020 tables are released)

source_dec = 'dec/sf1'
source_red = 'dec/pl'

#variables for each Census
years = {'2010':'P001001,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H003001,H003002,H003003',\
        '2020':'P1_001N,P2_002N,P2_005N,P2_006N,P2_007N,P2_008N,P2_009N,P2_010N,P2_011N,H1_001N,H1_002N,H1_003N'}

#rename 2010 columns to group
col_10_rename={'P001001':'Pop_10E','P005003':'P_Wh_10E','P005004':'P_Bl_10E','P005006':'P_As_10E','P005010':'P_Hi_10E',\
               'P005005':'P_Ot_10E','P005007':'P_Ot_10E','P005008':'P_Ot_10E','P005009':'P_Ot_10E',\
              'H003001':'Hou_10E','H003002':'Hou_O_10E','H003003':'Hou_V_10E'}

#rename 2020 columns to group
col_20_rename = {'P1_001N':'Pop_20E','P2_002N':'P_Hi_20E',\
              'P2_005N':'P_Wh_20E','P2_006N':'P_Bl_20E',\
              'P2_007N':'P_Ot_20E','P2_008N':'P_As_20E','P2_009N':'P_Ot_20E',\
              'P2_010N':'P_Ot_20E','P2_011N':'P_Ot_20E','H1_001N':'Hou_20E',\
              'H1_002N':'Hou_O_20E','H1_003N':'Hou_V_20E'}

### Urban Village for 2010 and 2020

In [5]:
df10 = get.get_bgp(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
df20 = get.get_bgp(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [6]:
def make_uvil(geodf,df,rename_col):
    df.rename(columns=rename_col,inplace=True)
    for col in df.columns[:-1]: df[col] = df[col].astype(int)
    df = pd.merge(geodf,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['GEO_ID','geoid'],axis=1)
    df = df.groupby(df.columns,axis=1).sum().groupby('name').sum().reset_index()
    return df

In [7]:
u10 = make_uvil(bgp_10,df10,col_10_rename)
u20 = make_uvil(bgp_20,df20,col_20_rename)
uvil = pd.merge(u10,u20,how='left',on='name')

### Make Table function for all other geos

In [8]:
def make_table(df,rename):
    df.rename(columns=rename,inplace=True)
    for col in df.columns[:-1]: df[col] = df[col].astype(int)
    df = df.groupby(df.columns,axis=1).sum()
    return df

### Phoenix for 2010 and 2020

In [9]:
p10 = get.get_phx(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
p20 = get.get_phx(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))
p10 = make_table(p10,col_10_rename)
p20 = make_table(p20,col_20_rename)
phx = pd.merge(p10,p20,how='left',on='GEO_ID')
phx.rename(columns={'GEO_ID':'name'},inplace=True) #for concat

### U.S. for 2010 and 2020

In [10]:
us10 = get.get_us(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
us20 = get.get_us(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))
us10 = make_table(us10,col_10_rename)
us20 = make_table(us20,col_20_rename)
us = pd.merge(us10,us20,how='left',on='us')

In [11]:
us.rename(columns={'us':'name'},inplace=True)
us = us[['name']+[col for col in us.columns if col !='name']]

## Maricopa County for 2010 and 2020

In [12]:
mar10 = get.get_maricopa(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
mar20 = get.get_maricopa(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))
mar10 = make_table(mar10,col_10_rename)
mar20 = make_table(mar20,col_20_rename)
mar = pd.merge(mar10,mar20,how='left',on='GEO_ID')
mar.rename(columns={'GEO_ID':'name'},inplace=True)

## Arizona for 2010 and 2020

In [13]:
az10 = get.get_az(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
az20 = get.get_az(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))
az10 = make_table(az10,col_10_rename)
az20 = make_table(az20,col_20_rename)
az = pd.merge(az10,az20,how='left',on='state')

In [14]:
az.rename(columns={'state':'name'},inplace=True)
az = az[['name']+[col for col in az.columns if col !='name']]

## Comp cities for 2010 and 2020

In [15]:
comps10 = get.get_comp_cities(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
comps20 = get.get_comp_cities(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))
comps10 = make_table(comps10,col_10_rename)
comps20 = make_table(comps20,col_20_rename)
comps = pd.merge(comps10,comps20,how='left',on='GEO_ID')

In [16]:
comps.rename(columns={'GEO_ID':'name'},inplace=True)

## Concat all geos together & export to excel

In [17]:
rename = {'1':'US','04':'AZ','04013':'Maricopa','0455000':'Phoenix',\
         '0473000':'Tempe','0465000':'Scottsdale','0427820':'Glendale',\
         '4865000':'San Antonio','4819000':'Dallas','1235000':'Jacksonville',\
         '1836000':'Indianapolis'}

In [18]:
final = pd.concat([phx,us,mar,az,comps])
final['name'] = final['name'].map(rename)
final = pd.concat([uvil,final])

In [20]:
final.to_excel('output/dec_pop_hou_race.xlsx',index=False)