## Pop & Housing variables pulled from 2010, and 2020 Censuses (using Cen API) for urban villages - checking the incorporated vs. unincorporated areas

Total Housing Units, Population, Population by Ethncity, Total Occupied Housing Units,  

In [1]:
import pandas as pd
import numpy as np

In [2]:
import getters as get

In [3]:
b10_red = pd.read_csv('../data/geo/blk_vil_10_reduced.csv')
b20_red = pd.read_csv('../data/geo/blk_vil_20_reduced.csv')
b10_full = pd.read_csv('../data/geo/blk_vil_10_full.csv')
b20_full = pd.read_csv('../data/geo/blk_vil_20_full.csv')
for df in [b10_red,b20_red,b10_full,b20_full]: df.geoid = df.geoid.apply(lambda x: '{0:0>15}'.format(x))

drop_cols = ['aland','awater','lat','lon']
for df in [b10_red,b20_red,b10_full,b20_full]: df.drop(drop_cols,axis=1,inplace=True)
for df in [b10_red,b20_red,b10_full,b20_full]: df.rename({'geoid':'GEO_ID'},axis=1,inplace=True)

In [4]:
## set sources, define variable lists by Decennial Census year
#SF1 Dec in 2010, Redistricting file in 2020 (until 2020 tables are released)

source_dec = 'dec/sf1'
source_red = 'dec/pl'

#variables for each Census
years = {'2010':'P001001,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H003001,H003002,H003003',\
        '2020':'P1_001N,P2_002N,P2_005N,P2_006N,P2_007N,P2_008N,P2_009N,P2_010N,P2_011N,H1_001N,H1_002N,H1_003N'}

#rename 2010 columns to group
col_10_rename={'P001001':'Pop_10E','P005003':'P_Wh_10E','P005004':'P_Bl_10E','P005006':'P_As_10E','P005010':'P_Hi_10E',\
               'P005005':'P_Ot_10E','P005007':'P_Ot_10E','P005008':'P_Ot_10E','P005009':'P_Ot_10E',\
              'H003001':'Hou_10E','H003002':'Hou_O_10E','H003003':'Hou_V_10E'}

#rename 2020 columns to group
col_20_rename = {'P1_001N':'Pop_20E','P2_002N':'P_Hi_20E',\
              'P2_005N':'P_Wh_20E','P2_006N':'P_Bl_20E',\
              'P2_007N':'P_Ot_20E','P2_008N':'P_As_20E','P2_009N':'P_Ot_20E',\
              'P2_010N':'P_Ot_20E','P2_011N':'P_Ot_20E','H1_001N':'Hou_20E',\
              'H1_002N':'Hou_O_20E','H1_003N':'Hou_V_20E'}

### Urban Village for 2010 QAQC

In [5]:
df10_red = get.get_blk(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]),b10_red)
df10_full = get.get_blk(source_dec,list(years.keys())[0],years.get(list(years.keys())[0]),b10_full)

In [6]:
for df in [df10_red,df10_full]:
    for col in df.columns[2:]:
        df[col] = df[col].astype(int)
    df.drop(['GEO_ID'],axis=1,inplace=True)
    df.rename(columns=col_10_rename,inplace=True)

In [7]:
df10_red = df10_red.groupby(df10_red.columns,axis=1).sum().groupby(['name']).sum().reset_index()

In [8]:
df10_full = df10_full.groupby(df10_full.columns,axis=1).sum().groupby(['name']).sum().reset_index()

## Urban Village for 2020 QAQC

In [None]:
df20_red = get.get_blk(source_red,list(years.keys())[1],years.get(list(years.keys())[1]),b20_red)
df20_full = get.get_blk(source_red,list(years.keys())[1],years.get(list(years.keys())[1]),b20_full)

In [None]:
for df in [df20_red,df20_full]:
    for col in df.columns[2:]:
        df[col] = df[col].astype(int)
    df.drop(['GEO_ID'],axis=1,inplace=True)
    df.rename(columns=col_20_rename,inplace=True)

In [None]:
df20_red = df20_red.groupby(df20_red.columns,axis=1).sum().groupby(['name']).sum().reset_index()

In [None]:
df20_full = df20_full.groupby(df20_full.columns,axis=1).sum().groupby(['name']).sum().reset_index()

In [None]:
with pd.ExcelWriter(f'output/Village-Block-Pop-QAQC.xlsx') as writer:
    df10_red.to_excel(writer, sheet_name="uv_10_reduced", index=False)
    df10_full.to_excel(writer, sheet_name="uv_10_full", index=False)
    df20_red.to_excel(writer, sheet_name="uv_20_reduced", index=False)
    df20_full.to_excel(writer, sheet_name="uv_20_full", index=False)