### get geography information (area and land area) from Census Tigerweb, pull in population and housing Decennial Census counts and calculate population and housing density in 2010 and 2020

In [1]:
import pandas as pd
import numpy as np

In [2]:
import geo_getter as get

## urban villages

In [3]:
bg_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bg_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')

In [4]:
uv10 = bg_10[['name','land_acre','aland']].copy()
uv10 = uv10.groupby(['name']).sum().reset_index()
uv10.rename(columns={'land_acre':'AREALAND_ACRE10','aland':'AREALAND10'},\
                        inplace=True)

In [5]:
uv20 = bg_20[['name','land_acre','aland']].copy()
uv20 = uv20.groupby(['name']).sum().reset_index()
uv20.rename(columns={'land_acre':'AREALAND_ACRE20','aland':'AREALAND20'},\
                        inplace=True)

In [6]:
uv = pd.merge(uv10,uv20,how='left',on='name')

## cities

In [7]:
cities = {455000:'Phoenix',473000:'Tempe',465000:'Scottsdale',427820:'Glendale',\
          4865000:'San Antonio', 4819000:'Dallas',1235000:'Jacksonville',\
          1836000:'Indianapolis'}

geoids = list(cities.keys())

In [8]:
pl10 = get.make_city('2010')
pl20 = get.make_city('2020')

In [9]:
places = pd.merge(pl10,pl20,how='left',on='GEOID')

In [10]:
places = places[places.GEOID.isin(list(cities.keys()))].copy()

In [11]:
places.rename(columns={'BASENAME10':'name'},inplace=True)
places = places.drop(['GEOID','AREAWATER10','BASENAME20','AREAWATER20'],axis=1)

## maricopa

In [13]:
mar10 = get.make_maricopa('2010')
mar20 = get.make_maricopa('2020')

In [14]:
mar = pd.merge(mar10,mar20,how='left',on='GEOID')

In [15]:
mar.rename(columns={'BASENAME10':'name'},inplace=True)
mar = mar.drop(['GEOID','AREAWATER10','BASENAME20','AREAWATER20'],axis=1)

## arizona

In [17]:
az10 = get.make_az('2010')
az20 = get.make_az('2020')
az = pd.merge(az10,az20,how='left',on='GEOID')
az['name'] = 'AZ'

In [18]:
az = az.drop(['GEOID','AREAWATER10','BASENAME10','BASENAME20','AREAWATER20'],axis=1)

## us

In [20]:
us10 = get.make_us('2010')
us20 = get.make_us('2020')
us = pd.merge(us10,us20,how='left',on='FUNCSTAT')

In [21]:
us['name'] = 'US'
us = us.drop(['FUNCSTAT','AREAWATER10','AREAWATER20'],axis=1)

## make big geography table - no urban vil

In [23]:
geos = pd.concat([uv,places,mar,az,us])

## read in pop and housing from output folder

In [24]:
data = pd.read_csv('output/dec_pop_hou_race.csv')

In [26]:
data = data[['name','Hou_10E','Hou_20E','Pop_10E','Pop_20E']]

## calc housing and pop density in 2010 and 2020

In [27]:
geos = pd.merge(geos,data,how='left',on='name')

In [29]:
geos['HouDen_10'] = geos.Hou_10E / geos.AREALAND_ACRE10
geos['PopDen_10'] = geos.Pop_10E / geos.AREALAND_ACRE10
geos['HouDen_20'] = geos.Hou_20E / geos.AREALAND_ACRE20
geos['PopDen_20'] = geos.Pop_20E / geos.AREALAND_ACRE20

In [32]:
geos.head(3)

Unnamed: 0,name,AREALAND_ACRE10,AREALAND10,AREALAND_ACRE20,AREALAND20,Hou_10E,Hou_20E,Pop_10E,Pop_20E,HouDen_10,PopDen_10,HouDen_20,PopDen_20
0,Ahwatukee Foothills,29912.92734,121053509,29844.149614,120775175.0,33528,34527,77344,80604,1.120853,2.585638,1.15691,2.700831
1,Alhambra,12416.119128,50246329,12498.698407,50580516.0,52066,52997,126444,144882,4.19342,10.183858,4.240202,11.591767
2,Camelback East,23347.765346,94485200,24110.846591,97573285.0,69263,75182,132153,144949,2.966579,5.660199,3.118182,6.011776


In [33]:
geos.to_csv('output/dec_pop_hou_den.csv',index=False)