## ACS Housing Units by Building Size for Urban Villages, City of Phoenix

- https://www.census.gov/data/developers/data-sets/acs-5year.html
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from hou_size_vars import *

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols = 'group(B25024)'

source = 'acs/acs5'

#### Calculate building size columns function

In [5]:
# function to sum select building columns & recalc MOES
def group_bsize(df,year):
    df['u_1_E'] = df.loc[:,U1E].sum(axis=1)
    df['u_1_M'] = df.apply(lambda x: (calc.get_moe(x[U1M])),axis=1)
    df['u_24_E'] = df.loc[:,U24E].sum(axis=1)
    df['u_24_M'] = df.apply(lambda x: (calc.get_moe(x[U24M])),axis=1)
    df['u_520_E'] = df.loc[:,U520E].sum(axis=1)
    df['u_520_M'] = df.apply(lambda x: (calc.get_moe(x[U520M])),axis=1)
    df['u_o20_E'] = df.loc[:,Uo20E].sum(axis=1)
    df['u_o20_M'] = df.apply(lambda x: (calc.get_moe(x[Uo20M])),axis=1)
    df['u_oth_E'] = df.loc[:,UOthE].sum(axis=1)
    df['u_oth_M'] = df.apply(lambda x: (calc.get_moe(x[UOthM])),axis=1)
    df = df.drop(columns=hou_vars).rename(hou_rename,axis=1)
    
    #add end-year into column name
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [6]:
# make urban villages from Census Block Groups

def make_uvil(geo_df,year): 
    df = get.get_bgp(source,year,cols)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_bsize(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [7]:
# make Phoenix or U.S.
def make_table(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols)
    elif geo == 'us':
        df = get.get_us(source,year,cols)
        df = df.drop(['NAME','us'],axis=1)
    else:
        pass
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_bsize(df,year)
    df = geo_agg.sumgeo_cv(df,'GEO_ID')
    return df

### Make a table with urban villages, city, and  U.S.

In [8]:
dfY1 = make_uvil(bgp_20,y1)

In [9]:
dfY1.head(3)

Unnamed: 0,name,u_tot_21E,u_tot_21M,u_tot_21C,u_24_21E,u_24_21M,u_24_21C,u_1_21E,u_1_21M,u_1_21C,u_520_21E,u_520_21M,u_520_21C,u_o20_21E,u_o20_21M,u_o20_21C,u_oth_21E,u_oth_21M,u_oth_21C
0,Alhambra,52655.0,1562.444239,1.803844,4300.0,577.579432,8.165398,29728.0,1276.070139,2.609414,8741.0,853.039272,5.932558,8573.0,697.810863,4.948106,1313.0,305.108178,14.126131
1,North Mountain,71371.0,1957.484866,1.667288,4973.0,670.345433,8.194346,41716.0,1496.202192,2.180327,11182.0,996.422601,5.416992,12309.0,1120.23078,5.532467,1191.0,284.852593,14.539267
2,Maryvale,71196.0,2069.086513,1.766677,3597.0,748.336154,12.647084,52135.0,1754.488529,2.045763,8721.0,811.039457,5.653401,5925.0,930.276841,9.544605,818.0,301.69521,22.420702


In [10]:
phY1 = make_table(y1,'phoenix')
phY1.rename(columns={'GEO_ID':'name'},inplace=True)

In [11]:
phY1

Unnamed: 0,name,u_tot_21E,u_tot_21M,u_tot_21C,u_24_21E,u_24_21M,u_24_21C,u_1_21E,u_1_21M,u_1_21C,u_520_21E,u_520_21M,u_520_21C,u_o20_21E,u_o20_21M,u_o20_21C,u_oth_21E,u_oth_21M,u_oth_21C
0,455000,624409.0,3175.0,0.309107,37961.0,1760.392286,2.81907,406366.0,3848.168525,0.575666,75603.0,2678.381041,2.153612,84876.0,2392.082356,1.713268,19603.0,1111.260546,3.446097


In [12]:
usY1 = make_table(y1,'us')
usY1.rename(columns={'GEO_ID':'name'},inplace=True)

In [13]:
usY1

Unnamed: 0,name,u_tot_21E,u_tot_21M,u_tot_21C,u_24_21E,u_24_21M,u_24_21C,u_1_21E,u_1_21M,u_1_21C,u_520_21E,u_520_21M,u_520_21C,u_o20_21E,u_o20_21M,u_o20_21C,u_oth_21E,u_oth_21M,u_oth_21C
0,0100000US,139647020.0,3504.0,0.001525,10911007.0,39117.315258,0.217941,94432071.0,117778.628524,0.07582,12494740.0,42232.765384,0.205474,13480195.0,29822.808218,0.134489,8329007.0,46744.176279,0.341168


## Build current year table for all geos & export to Excel

In [14]:
final = pd.concat([dfY1,phY1,usY1])
final.to_excel(f'output/Hou_bsize.xlsx',index=False)