In [7]:
import pandas as pd
import math
import numpy as np
import os

In [8]:
import getters as get
import utilcalcs as calc
import geo_agg
from acs_hhsize_vars import *

In [9]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0_5 = '2013'
y0_1 = '2013'

cols_pop = 'group(B25008)'
cols_hou = 'group(B25009)'

source5 = 'acs/acs5'
source1 = 'acs/acs1'

### Table functions

In [10]:
def make_pop(df):
    df.rename(columns=ten_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = get.clean_data(df,['GEO_ID']+list(ten_rename.values()))
    return df

def make_hou(df):
    df.rename(columns=hh_rename,inplace=True)
    df = df[['GEO_ID','h_tot_E','h_tot_M','h_otot_E',\
           'h_otot_M','h_rtot_E','h_rtot_M']] #take only total households
    df = get.clean_data(df,df.columns)
    return df

In [11]:
# make city and larger geos
def make_table(source,year,cols,geotype='big'):
    if geotype=='big':
        ph = get.get_phx(source,year,cols)
        mar = get.get_maricopa(source,year,cols)
        us = get.get_us(source,year,cols)
        az = get.get_az(source,year,cols)
        cit = get.get_comp_cities(source,year,cols)
        df = pd.concat([ph,mar,us,az,cit])
        df = df.drop(['NAME','us','state'],axis=1)
        aggtype='GEO_ID'
    elif geotype=='uv':
        df = get.get_bgp(source,year,cols)
        aggtype='name'
    elif geotype=='az_places':
        df = get.get_az_plc(source,year,cols)
        aggtype='GEO_ID'
    else:
        pass
    
    if cols == cols_pop:
        df = make_pop(df)
    elif cols == cols_hou:
        df = make_hou(df)
    
    if geotype=='uv': df = geo_agg.make_uv(df,int(year))
    df = geo_agg.sumgeo_cv(df,aggtype)
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [12]:
def make_avg(df1,df2,year,merge_id):
    df = pd.merge(df1,df2,how='left',on=merge_id)
    df[f'a_tot_{year[-2:]}E'] = df[f'p_tot_{year[-2:]}E'] / df[f'h_tot_{year[-2:]}E'] 
    df[f'a_otot_{year[-2:]}E'] = df[f'p_o_{year[-2:]}E'] / df[f'h_otot_{year[-2:]}E']
    df[f'a_rtot_{year[-2:]}E'] = df[f'p_r_{year[-2:]}E'] / df[f'h_rtot_{year[-2:]}E']
    return df

## 1-year table for city & larger geos

In [29]:
#comp cities and big geo rename dictionary
rename_geos = {'0455000':'Phoenix', '04013':'Maricopa', '0100000US':'US', '0400000US04':'AZ',\
               '0473000':'Tempe','0465000':'Scottsdale', '0427820':'Glendale',\
               '4865000':'San Antonio', '4835000':'Houston', '1235000':'Jacksonville'}

In [30]:
pop_y1_1y = make_table(source1,y1,cols_pop)
hou_y1_1y = make_table(source1,y1,cols_hou)
popaz_y1_1y = make_table(source1,y1,cols_pop,'az_places')
houaz_y1_1y = make_table(source1,y1,cols_hou,'az_places')

In [31]:
big = pd.merge(pop_y1_1y,hou_y1_1y,how='left',on='GEO_ID')
az = pd.merge(popaz_y1_1y,houaz_y1_1y,how='left',on='GEO_ID')
big['GEO_ID'] = big.GEO_ID.map(rename_geos)
yr1 = pd.concat([big,az])

In [32]:
yr1

Unnamed: 0,GEO_ID,p_o_21E,p_o_21M,p_o_21C,p_r_21E,p_r_21M,p_r_21C,p_tot_21E,p_tot_21M,p_tot_21C,h_otot_21E,h_otot_21M,h_otot_21C,h_rtot_21E,h_rtot_21M,h_rtot_21C,h_tot_21E,h_tot_21M,h_tot_21C
0,Phoenix,977426.0,24548.0,1.526744,624145.0,24633.0,2.399197,1601571.0,5087.0,0.193085,354237.0,9062.0,1.555121,247802.0,8175.0,2.005474,602039.0,7755.0,0.783053
1,Maricopa,3011806.0,35453.0,0.715583,1423752.0,34603.0,1.477452,4435558.0,5794.0,0.079408,1124423.0,11341.0,0.613134,583611.0,10884.0,1.133703,1708034.0,7178.0,0.255471
2,US,221165400.0,446165.0,0.122634,102967486.0,446165.0,0.263408,324132886.0,0.0,0.0,83396988.0,187164.0,0.136429,44147742.0,126106.0,0.173645,127544730.0,97632.0,0.046533
3,AZ,4934464.0,41714.0,0.513897,2196143.0,41714.0,1.154663,7130607.0,0.0,0.0,1905690.0,14256.0,0.454757,912033.0,14327.0,0.954946,2817723.0,10850.0,0.234081
4,San Antonio,812886.0,21067.0,1.575459,618959.0,21672.0,2.128488,1431845.0,9136.0,0.387877,290123.0,6616.0,1.386269,259122.0,8435.0,1.978859,549245.0,5787.0,0.640503
5,Houston,1024291.0,25301.0,1.50158,1220907.0,25985.0,1.293821,2245198.0,14107.0,0.381957,390226.0,9351.0,1.45672,534755.0,10246.0,1.164752,924981.0,9366.0,0.615539
6,Jacksonville,566725.0,15957.0,1.711642,367050.0,16147.0,2.674242,933775.0,7054.0,0.459227,226429.0,5690.0,1.527616,159854.0,6368.0,2.421663,386283.0,5159.0,0.811884
0,Gilbert,211313.0,7180.0,2.065534,61393.0,7212.0,7.141196,272706.0,295.0,0.06576,69983.0,2583.0,2.243706,23489.0,2796.0,7.236136,93472.0,2773.0,1.803443
1,Glendale,149253.0,7918.0,3.224976,96720.0,8022.0,5.041972,245973.0,1246.0,0.307939,52463.0,2915.0,3.377688,38141.0,2666.0,4.249151,90604.0,2635.0,1.767939
2,Goodyear,76688.0,6722.0,5.328503,19398.0,6299.0,19.740073,96086.0,2567.0,1.624052,25725.0,2612.0,6.172369,7405.0,1710.0,14.037997,33130.0,2366.0,4.341376


In [33]:
yr1.to_excel('acs_1yr_ten.xlsx',index=False)

## Urban Village

#### Population by tenure

In [6]:
def make_pop_ten(geo_df,year):
    df = get.get_bgp(source,year,cols_pop)
    df.rename(columns=ten_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = get.clean_data(df,['GEO_ID']+list(ten_rename.values()))
    df = geo_agg.make_uv(df,int(year))
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [None]:
py1 = make_pop_ten(bgp_20,y1)
py0 = make_pop_ten(bgp_10,y0)

#### Households by tenure

In [None]:
def make_hh_ten(geo_df,year):
    df = get.get_bgp(source,year,cols_hou)
    df.rename(columns=hh_rename,inplace=True)
    df = df[['GEO_ID','h_tot_E','h_tot_M','h_otot_E',\
           'h_otot_M','h_rtot_E','h_rtot_M']] #take only total households
    df = get.clean_data(df,df.columns)
    df = geo_agg.make_uv(df,int(year))
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [None]:
hy1 = make_hh_ten(bgp_20,y1)
hy0 = make_hh_ten(bgp_10,y0)

### Average Household size in y1 and  y0

In [None]:
avgy1 = make_avg(py1,hy1,y1,'name')
avgy0 = make_avg(py0,hy0,y0,'name')

In [None]:
avg_vil = pd.merge(avgy1,avgy0,how='left',on='name')

In [None]:
avg_vil.head(3)

In [None]:
with pd.ExcelWriter(f'output/HH_size_ten.xlsx') as writer:
    avg_vil.to_excel(writer, sheet_name="urban_village", index=False)
    avg_phx.to_excel(writer, sheet_name="phoenix", index=False)
    avg_us.to_excel(writer, sheet_name="us", index=False)

In [None]:
def make_table(year,geo,cols):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_pop)
        dff = get.get_phx(source,year,cols_hou)
    elif geo == 'us':
        df = get.get_us(source,year,cols_pop)
        dff = get.get_us(source,year,cols_hou)
    else:
        pass
    
    df.rename(columns=ten_rename,inplace=True)
    df = df.filter(regex='(?<!A)$',axis=1) #drop non-estimate columns
    df = get.clean_data(df,['GEO_ID']+list(ten_rename.values()))
    
    dff.rename(columns=hh_rename,inplace=True)
    dff = dff[['GEO_ID','h_tot_E','h_tot_M','h_otot_E',\
           'h_otot_M','h_rtot_E','h_rtot_M']] #take only total households
    dff = get.clean_data(dff,dff.columns)
    
    for frame in [df,dff]:
        for col in frame.columns[1:]:
            frame.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
            
    avg = make_avg(df,dff,year,'GEO_ID')
    return avg