In [1]:
import pandas as pd
import math
import numpy as np
import os

In [3]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from acs_hhsize_vars import *

In [4]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [5]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols_pop = 'group(B25008)'
cols_hou = 'group(B25009)'

source = 'acs/acs5'

#### Population by tenure

In [6]:
def make_pop_ten(geo_df,year):
    df = get.get_bgp(source,year,cols_pop)
    df.rename(columns=ten_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = get.clean_data(df,['GEO_ID']+list(ten_rename.values()))
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [7]:
py1 = make_pop_ten(bgp_20,y1)
py0 = make_pop_ten(bgp_10,y0)

#### Households by tenure

In [8]:
def make_hh_ten(geo_df,year):
    df = get.get_bgp(source,year,cols_hou)
    df.rename(columns=hh_rename,inplace=True)
    df = df[['GEO_ID','h_tot_E','h_tot_M','h_otot_E',\
           'h_otot_M','h_rtot_E','h_rtot_M']] #take only total households
    df = get.clean_data(df,df.columns)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [9]:
hy1 = make_hh_ten(bgp_20,y1)
hy0 = make_hh_ten(bgp_10,y0)

### Average Household size in y1 and  y0

In [10]:
def make_avg(df1,df2,year,merge_id):
    df = pd.merge(df1,df2,how='left',on=merge_id)
    df[f'a_tot_{year[-2:]}E'] = df[f'p_tot_{year[-2:]}E'] / df[f'h_tot_{year[-2:]}E'] 
    df[f'a_otot_{year[-2:]}E'] = df[f'p_o_{year[-2:]}E'] / df[f'h_otot_{year[-2:]}E']
    df[f'a_rtot_{year[-2:]}E'] = df[f'p_r_{year[-2:]}E'] / df[f'h_rtot_{year[-2:]}E']
    return df

In [11]:
avgy1 = make_avg(py1,hy1,y1,'name')
avgy0 = make_avg(py0,hy0,y0,'name')

In [12]:
avg_vil = pd.merge(avgy1,avgy0,how='left',on='name')

In [13]:
avg_vil.head(3)

Unnamed: 0,name,p_r_21E,p_r_21M,p_r_21C,p_tot_21E,p_tot_21M,p_tot_21C,p_o_21E,p_o_21M,p_o_21C,...,h_otot_13C,h_rtot_13E,h_rtot_13M,h_rtot_13C,h_tot_13E,h_tot_13M,h_tot_13C,a_tot_13E,a_otot_13E,a_rtot_13E
0,Alhambra,69590.0,4438.001915,3.876812,135881.0,5273.218372,2.359126,66291.0,3520.798205,3.228648,...,2.277293,23584.0,991.434315,2.555528,44410.0,1119.382419,1.532258,2.939315,2.928647,2.948736
1,North Mountain,74541.0,4096.308094,3.340654,165668.0,5920.925857,2.172627,91127.0,4690.628636,3.12909,...,1.694056,28423.0,1098.14571,2.348682,61245.0,1280.656472,1.271148,2.590922,2.592621,2.58896
2,Maryvale,106692.0,5785.361873,3.296346,241055.0,8101.846271,2.043158,134363.0,6484.600373,2.933848,...,1.919751,30255.0,1109.630119,2.22954,59203.0,1231.623319,1.264644,3.714829,3.734006,3.69648


### Make Phoenix & U.S. function

In [14]:
def make_table(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_pop)
        dff = get.get_phx(source,year,cols_hou)
    elif geo == 'us':
        df = get.get_us(source,year,cols_pop)
        dff = get.get_us(source,year,cols_hou)
    else:
        pass
    
    df.rename(columns=ten_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = get.clean_data(df,['GEO_ID']+list(ten_rename.values()))
    
    dff.rename(columns=hh_rename,inplace=True)
    dff = dff[['GEO_ID','h_tot_E','h_tot_M','h_otot_E',\
           'h_otot_M','h_rtot_E','h_rtot_M']] #take only total households
    dff = get.clean_data(dff,dff.columns)
    
    for frame in [df,dff]:
        for col in frame.columns[1:]:
            frame.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
            
    avg = make_avg(df,dff,year,'GEO_ID')
    return avg

## City of Phoenix

In [15]:
phx1 = make_table(y1,'phoenix')
phx0 = make_table(y0,'phoenix')

In [16]:
avg_phx = pd.merge(phx1,phx0,how='left',on='GEO_ID')
avg_phx = geo_agg.sumgeo_cv(avg_phx,'GEO_ID')
avg_phx

Unnamed: 0,GEO_ID,a_tot_13E,a_tot_13M,a_tot_13C,h_tot_21E,h_tot_21M,h_tot_21C,a_otot_13E,a_otot_13M,a_otot_13C,...,a_rtot_21C,p_o_13E,p_o_13M,p_o_13C,h_otot_21E,h_otot_21M,h_otot_21C,p_o_21E,p_o_21M,p_o_21C
0,455000,2.81403,,,579876.0,2997.0,0.314185,2.844262,,,...,,816525.0,8870.0,0.660371,325058.0,3574.0,0.668387,922220.0,10456.0,0.689232


## U.S.

In [17]:
us1 = make_table(y1,'us')
us0 = make_table(y0,'us')

In [18]:
avg_us = pd.merge(us1,us0,how='left',on='GEO_ID')
avg_us = geo_agg.sumgeo_cv(avg_us,'GEO_ID')
avg_us

Unnamed: 0,GEO_ID,a_tot_13E,a_tot_13M,a_tot_13C,h_tot_21E,h_tot_21M,h_tot_21C,a_otot_13E,a_otot_13M,a_otot_13C,...,a_rtot_21C,p_o_13E,p_o_13M,p_o_13C,h_otot_21E,h_otot_21M,h_otot_21C,p_o_21E,p_o_21M,p_o_21C
0,0100000US,2.625283,,,124010992.0,196755.0,0.096449,2.691335,,,...,,202053822.0,704205.0,0.211868,80152161.0,337611.0,0.256056,215577207.0,690800.0,0.194798


In [19]:
with pd.ExcelWriter(f'output/HH_size_ten.xlsx') as writer:
    avg_vil.to_excel(writer, sheet_name="urban_village", index=False)
    avg_phx.to_excel(writer, sheet_name="phoenix", index=False)
    avg_us.to_excel(writer, sheet_name="us", index=False)