## ACS Housing Units by Building Size for Urban Villages, City of Phoenix

- https://www.census.gov/data/developers/data-sets/acs-5year.html
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from hou_size_vars import *

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols = 'group(B25024)'

source = 'acs/acs5'

#### Calculate building size columns function

In [13]:
df = get.get_bgp(source,y1,cols)
df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
df = get.clean_data(df,df.columns)

In [14]:
df.head()

Unnamed: 0,GEO_ID,B25024_001E,B25024_001M,B25024_002E,B25024_002M,B25024_003E,B25024_003M,B25024_004E,B25024_004M,B25024_005E,...,B25024_007E,B25024_007M,B25024_008E,B25024_008M,B25024_009E,B25024_009M,B25024_010E,B25024_010M,B25024_011E,B25024_011M
0,40130101021,487.0,166.0,487.0,166.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
1,40130101022,980.0,192.0,980.0,192.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
2,40130101023,2062.0,268.0,1966.0,276.0,96.0,90.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
3,40130101031,999.0,253.0,957.0,257.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,42.0,65.0,0.0,13.0
4,40130101032,794.0,173.0,703.0,158.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,26.0,38.0,0.0,13.0,65.0,98.0,0.0,13.0


In [15]:
# function to sum select building columns & recalc MOES
def group_bsize(df,year):
    df['u_1_E'] = df.loc[:,U1E].sum(axis=1)
    df['u_1_M'] = df.apply(lambda x: (calc.get_moe(x[U1M])),axis=1)
    df['u_24_E'] = df.loc[:,U24E].sum(axis=1)
    df['u_24_M'] = df.apply(lambda x: (calc.get_moe(x[U24M])),axis=1)
    df['u_520_E'] = df.loc[:,U520E].sum(axis=1)
    df['u_520_M'] = df.apply(lambda x: (calc.get_moe(x[U520M])),axis=1)
    df['u_o20_E'] = df.loc[:,Uo20E].sum(axis=1)
    df['u_o20_M'] = df.apply(lambda x: (calc.get_moe(x[Uo20M])),axis=1)
    df['u_oth_E'] = df.loc[:,UOthE].sum(axis=1)
    df['u_oth_M'] = df.apply(lambda x: (calc.get_moe(x[UOthM])),axis=1)
    df = df.drop(hou_vars,axis=1).rename(columns=hou_rename,inplace=True)
    
    #add end-year into column name
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [18]:
def make_uvil(geo_df,year): 
    df = get.get_bgp(source,year,cols)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
    df = get.clean_data(df,df.columns)
    df = group_bsize(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [22]:
df = get.get_bgp(source,y1,cols)
df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
df = get.clean_data(df,df.columns)

In [25]:
df['u_1_E'] = df.loc[:,U1E].sum(axis=1)
df['u_1_M'] = df.apply(lambda x: (calc.get_moe(x[U1M])),axis=1)
df['u_24_E'] = df.loc[:,U24E].sum(axis=1)
df['u_24_M'] = df.apply(lambda x: (calc.get_moe(x[U24M])),axis=1)
df['u_520_E'] = df.loc[:,U520E].sum(axis=1)
df['u_520_M'] = df.apply(lambda x: (calc.get_moe(x[U520M])),axis=1)
df['u_o20_E'] = df.loc[:,Uo20E].sum(axis=1)
df['u_o20_M'] = df.apply(lambda x: (calc.get_moe(x[Uo20M])),axis=1)
df['u_oth_E'] = df.loc[:,UOthE].sum(axis=1)
df['u_oth_M'] = df.apply(lambda x: (calc.get_moe(x[UOthM])),axis=1)

In [27]:
df = df.rename(columns=hou_rename,inplace=True).drop(hou_vars,axis=1)

AttributeError: 'NoneType' object has no attribute 'drop'

In [29]:
print(hou_vars)

['B25024_002E', 'B25024_003E', 'B25024_002M', 'B25024_003M', 'B25024_004E', 'B25024_005E', 'B25024_004M', 'B25024_005M', 'B25024_006E', 'B25024_007E', 'B25024_006M', 'B25024_007M', 'B25024_008E', 'B25024_009E', 'B25024_008M', 'B25024_009M', 'B25024_010E', 'B25024_011E', 'B25024_010M', 'B25024_011M']


In [28]:
df.head()

Unnamed: 0,GEO_ID,u_tot_E,u_tot_M,B25024_002E,B25024_002M,B25024_003E,B25024_003M,B25024_004E,B25024_004M,B25024_005E,...,u_1_E,u_1_M,u_24_E,u_24_M,u_520_E,u_520_M,u_o20_E,u_o20_M,u_oth_E,u_oth_M
0,40130101021,487.0,166.0,487.0,166.0,0.0,13.0,0.0,13.0,0.0,...,487.0,166.508258,0.0,18.384776,0.0,18.384776,0.0,18.384776,0.0,18.384776
1,40130101022,980.0,192.0,980.0,192.0,0.0,13.0,0.0,13.0,0.0,...,980.0,192.439601,0.0,18.384776,0.0,18.384776,0.0,18.384776,0.0,18.384776
2,40130101023,2062.0,268.0,1966.0,276.0,96.0,90.0,0.0,13.0,0.0,...,2062.0,290.30329,0.0,18.384776,0.0,18.384776,0.0,18.384776,0.0,18.384776
3,40130101031,999.0,253.0,957.0,257.0,0.0,13.0,0.0,13.0,0.0,...,957.0,257.328584,0.0,18.384776,0.0,18.384776,0.0,18.384776,42.0,66.287254
4,40130101032,794.0,173.0,703.0,158.0,0.0,13.0,0.0,13.0,0.0,...,703.0,158.533908,0.0,18.384776,0.0,18.384776,26.0,40.162171,65.0,98.858485


In [24]:
dff = group_bsize(df,y1)

AttributeError: 'NoneType' object has no attribute 'columns'

In [None]:
dff.head()

In [23]:
df.head()

Unnamed: 0,GEO_ID,B25024_001E,B25024_001M,B25024_002E,B25024_002M,B25024_003E,B25024_003M,B25024_004E,B25024_004M,B25024_005E,...,B25024_007E,B25024_007M,B25024_008E,B25024_008M,B25024_009E,B25024_009M,B25024_010E,B25024_010M,B25024_011E,B25024_011M
0,40130101021,487.0,166.0,487.0,166.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
1,40130101022,980.0,192.0,980.0,192.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
2,40130101023,2062.0,268.0,1966.0,276.0,96.0,90.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0,13.0
3,40130101031,999.0,253.0,957.0,257.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,0.0,13.0,0.0,13.0,42.0,65.0,0.0,13.0
4,40130101032,794.0,173.0,703.0,158.0,0.0,13.0,0.0,13.0,0.0,...,0.0,13.0,26.0,38.0,0.0,13.0,65.0,98.0,0.0,13.0


In [19]:
dfY1 = make_uvil(bgp_20,y1)

AttributeError: 'NoneType' object has no attribute 'columns'

In [20]:
dfY1

NameError: name 'dfY1' is not defined

### County + Subregion - Year 0 & Year1

In [None]:
c_y0 = get.get_county(p,source,years[0],col)
c_y1 = get.get_county(p,source,years[1],col)

In [None]:
c_y0 = group_bsize(c_y0,years[0])
c_y1 = group_bsize(c_y1,years[1])

In [None]:
cou = pd.merge(c_y0,c_y1,on='GEO_ID')
cou.head()

In [None]:
cou['sub'] = cou['GEO_ID'].map(sub_7)

In [None]:
subreg = cou.drop('GEO_ID',axis=1).copy()
subreg = subreg[['sub']+[col for col in subreg.columns if col !='sub']]
subreg = geo_agg.sumgeo_cv(subreg,'sub')
subreg = subreg.rename({'sub':'GEO_ID'},axis=1)

In [None]:
subreg

In [None]:
subreg.columns

In [None]:
cousubreg = pd.concat([cou,subreg])

In [None]:
cousubreg.head()

### Subplace - Year 0

In [None]:
#all ny places,reduce to just li places
pl_y0 = get.get_place(p,source,years[0],col)
pl_y0 = pl_y0[pl_y0['GEO_ID'].isin(subpl10)]
#all mcds in region
mcd_y0 = get.get_mcd(p,source,years[0],col)
#combine and clean
df_y0 = pd.concat([pl_y0,mcd_y0])
df_y0 = group_bsize(df_y0,years[0])

### Subplace Year 1

In [None]:
#all ny places,reduce to just li places
pl_y1 = get.get_place(p,source,years[1],col)
pl_y1 = pl_y1[pl_y1['GEO_ID'].isin(liplaces19)]
#all mcds in region
mcd_y1 = get.get_mcd(p,source,years[1],col)
#combine and clean
df_y1 = pd.concat([pl_y1,mcd_y1])
df_y1 = group_bsize(df_y1,years[1])

### Census Tract - Year 0

In [None]:
ct_y0 = get.get_tract(p,source,years[0],col)
ct_y0 = group_bsize(ct_y0,years[0])

### Census Tract - Year 1

In [None]:
ct_y1 = get.get_tract(p,source,years[1],col)
ct_y1 = group_bsize(ct_y1,years[1])

In [None]:
ct_y1.head()

## Export files to Excel

In [None]:
cousubreg.to_excel(f'../../output/Housing/BuildSize_ACS_cousubreg_{years[0]}{years[1]}.xlsx',index=False)

In [None]:
ct_y1.to_excel(f'../output/Housing/BuildSize_ACS_tract_{years[1]}.xlsx',index=False)