## Pop & Housing variables pulled from 2010, and 2020 Censuses (using Cen API) for urban villages,  City of Phoenix and U.S.

Total Housing Units, Population, Population by Ethncity, Total Occupied Housing Units,  


In [1]:
import pandas as pd
import numpy as np

In [2]:
from geo import *
import get_dec as get

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
source_dec = 'dec/sf1'
source_red = 'dec/pl'
years = {'2010':'P001001,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H003001,H003002,H003003',\
        '2020':'P1_001N,P2_002N,P2_005N,P2_006N,P2_007N,P2_008N,P2_009N,P2_010N,P2_011N,H1_001N,H1_002N,H1_003N'}

col_10_rename={'P001001':'Pop_10E','P005003':'P_Wh_10E','P005004':'P_Bl_10E','P005006':'P_As_10E','P005010':'P_Hi_10E',\
               'P005005':'P_Ot_10E','P005007':'P_Ot_10E','P005008':'P_Ot_10E','P005009':'P_Ot_10E',\
              'H003001':'Hou_10E','H003002':'Hou_O_10E','H003003':'Hou_V_10E'}

col_20_rename = {'P1_001N':'Pop_20E','P2_002N':'P_Hi_20E',\
              'P2_005N':'P_Wh_20E','P2_006N':'P_Bl_20E',\
              'P2_007N':'P_Ot_20E','P2_008N':'P_As_20E','P2_009N':'P_Ot_20E',\
              'P2_010N':'P_Ot_20E','P2_011N':'P_Ot_20E','H1_001N':'Hou_20E',\
              'H1_002N':'Hou_O_20E','H1_003N':'Hou_V_20E'}

### Urban Village for 2010 and 2020

In [5]:
df10 = get.get_bgp(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
df20 = get.get_bgp(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [6]:
def make_uvil(geodf,df,rename_col):
    df.rename(columns=rename_col,inplace=True)
    for col in df.columns[:-1]: df[col] = df[col].astype(int)
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']]
    df = pd.merge(geodf,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['GEO_ID','geoid'],axis=1)
    df = df.groupby(df.columns,axis=1).sum().groupby('name').sum().reset_index()
    return df

In [7]:
df10.head()

Unnamed: 0,P001001,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H003001,H003002,H003003,GEO_ID
0,2157,1967,17,4,40,2,5,23,99,946,809,137,40130101011
1,1716,1587,25,5,15,5,3,18,58,1071,757,314,40130101012
2,1200,1187,2,1,6,0,0,0,4,1120,680,440,40130101013
3,859,780,1,0,2,5,0,7,64,423,341,82,40130101021
4,1728,1549,10,2,68,0,0,33,66,929,715,214,40130101022


In [8]:
df10.rename(columns=col_10_rename,inplace=True)

In [9]:
df10.columns

Index(['Pop_10E', 'P_Wh_10E', 'P_Bl_10E', 'P_Ot_10E', 'P_As_10E', 'P_Ot_10E',
       'P_Ot_10E', 'P_Ot_10E', 'P_Hi_10E', 'Hou_10E', 'Hou_O_10E', 'Hou_V_10E',
       'GEO_ID'],
      dtype='object')

In [10]:
u10 = make_uvil(bgp_10,df10,col_10_rename)

In [11]:
u10.head(3)

Unnamed: 0,name,Hou_10E,Hou_O_10E,Hou_V_10E,P_As_10E,P_Bl_10E,P_Hi_10E,P_Ot_10E,P_Wh_10E,Pop_10E
0,Ahwatukee Foothills,33528,31300,2228,4693,3969,9554,12676,55959,77344
1,Alhambra,52066,43584,8482,4774,7687,61060,23020,47168,126444
2,Camelback East,69263,58921,10342,2858,7452,42558,26436,72676,132153


In [12]:
u20 = make_uvil(bgp_20,df20,col_20_rename)

In [13]:
u20.head(3)

Unnamed: 0,name,Hou_20E,Hou_O_20E,Hou_V_20E,P_As_20E,P_Bl_20E,P_Hi_20E,P_Ot_20E,P_Wh_20E,Pop_20E
0,Ahwatukee Foothills,34527,32555,1972,5389,4876,12864,25080,51205,80604
1,Alhambra,52997,48670,4327,7068,13457,68897,37160,46170,144882
2,Camelback East,75182,67196,7986,4242,9134,42702,40820,78666,144949


In [14]:
uvil = pd.merge(u10,u20,how='left',on='name')

### Phoenix for 2010 and 2020

In [15]:
def make_table(df,rename):
    df.rename(columns=rename,inplace=True)
    for col in df.columns[:-1]: df[col] = df[col].astype(int)
    df = df.groupby(df.columns,axis=1).sum()
    return df

In [16]:
p10 = get.get_phx(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
p20 = get.get_phx(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [17]:
p10 = make_table(p10,col_10_rename)
p20 = make_table(p20,col_20_rename)

In [18]:
phx = pd.merge(p10,p20,how='left',on='GEO_ID')
phx.rename(columns={'GEO_ID':'name'},inplace=True)

## U.S. for 2010 and 2020

In [19]:
us10 = get.get_us(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]))
us20 = get.get_us(source_red,list(years.keys())[1],years.get(list(years.keys())[1]))

In [20]:
us10 = make_table(us10,col_10_rename)
us20 = make_table(us20,col_20_rename)

In [21]:
us = pd.merge(us10,us20,how='left',on='us')

In [22]:
us.rename(columns={'us':'name'},inplace=True)
us = us[['name']+[col for col in us.columns if col !='GEO_ID']]

## Concat all geos together & export to excel

In [23]:
final = pd.concat([uvil,phx,us])

ValueError: Plan shapes are not aligned

In [None]:
final.to_excel('output/Dec_pop_hou_race.xlsx',index=False)