In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys,os,time,math,csv
import itertools
import collections

import numpy as np
import pandas as pd
import networkx as nx

import matplotlib
import matplotlib.pyplot as plt
plt.style.use("ggplot")

### Load State FIPS code map

This is a list of the states that we care about.

In [2]:
state_fips_to_name = {}
state_name_to_fips = {}
with open("data/state_fips.csv", "r") as f:
    for line in f:
        line = line.strip()
        if line != "":
            parts = line.split(",")
            state_fips_to_name["%02d" % (int(parts[2]))] = parts[0]
            state_name_to_fips[parts[0]] = "%02d" % (int(parts[2]))

### Load regression results for estimated numbers of housing units

In [3]:
county_fip_list = []
block_group_fip_list = []
block_group_estimates = collections.OrderedDict()


f = open("data/regression_estimated_housing_units.csv", "r")
predicted_years = list(map(int, f.readline().strip().split(",")[2:]))
n = 0
m = len(predicted_years)

for line in f:
    line = line.strip()
    if line != "":
        parts = line.split(",")
        county_fips = parts[0]
        block_group_fips = parts[1]
        values = np.array(list(map(float,parts[2:])))
        
        county_fip_list.append(county_fips)
        block_group_fip_list.append(block_group_fips)
        block_group_estimates[block_group_fips] = values
        
        n += 1
        assert len(values) == m
f.close()

### Load Census 2010 PPHU and GQ data

Here we need to account for the geography changes in 2011 and 2012:

- https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2011/geography-changes.html
- https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2012/geography-changes.html

In [18]:
mapping_census_2010_to_acs_2012 = {
    "040190027011" : "040190027041", # Block Group 1, Census Tract 27.01, Pima County, Arizona
    "040190027012" : "040190027042", # Block Group 2, Census Tract 27.01, Pima County, Arizona
    "040190029031" : "040190029061", # Block Group 1, Census Tract 29.03, Pima County, Arizona
    "040194105011" : "040190041181", # Block Group 1, Census Tract 4105.01, Pima County, Arizona
    "040194105021" : "040190041211", # Block Group 1, Census Tract 4105.02, Pima County, Arizona
    "040194105031" : "040190041251", # Block Group 1, Census Tract 4105.03, Pima County, Arizona
    "040194105032" : "040190041252", # Block Group 2, Census Tract 4105.03, Pima County, Arizona
    "040194704001" : "040190052001", # Block Group 1, Census Tract 4704, Pima County, Arizona
    "040194704002" : "040190052002", # Block Group 2, Census Tract 4704, Pima County, Arizona
    "040194704003" : "040190052003", # Block Group 3, Census Tract 4704, Pima County, Arizona
    "040194704004" : "040190052004", # Block Group 4, Census Tract 4704, Pima County, Arizona
    "040194705001" : "040190053001", # Block Group 1, Census Tract 4705, Pima County, Arizona
    "040194705002" : "040190053002", # Block Group 2, Census Tract 4705, Pima County, Arizona
    "060378002043" : "060371370001", # Block Group 3, Census Tract 8002.04, Los Angeles County, California
    "060379304011" : "060371370002", # Block Group 1, Census Tract 9304.01, Los Angeles County, California
    "360539401011" : "360530301011", # Block Group 1, Census Tract 9401.01, Madison County, New York
    "360539401012" : "360530301012", # Block Group 2, Census Tract 9401.01, Madison County, New York
    "360539401021" : "360530301021", # Block Group 1, Census Tract 9401.02, Madison County, New York
    "360539401022" : "360530301022", # Block Group 2, Census Tract 9401.02, Madison County, New York
    "360539401023" : "360530301023", # Block Group 3, Census Tract 9401.02, Madison County, New York
    "360539401024" : "360530301024", # Block Group 4, Census Tract 9401.02, Madison County, New York
    "360539401031" : "360530301031", # Block Group 1, Census Tract 9401.03, Madison County, New York
    "360539401032" : "360530301032", # Block Group 2, Census Tract 9401.03, Madison County, New York
    "360539401033" : "360530301033", # Block Group 3, Census Tract 9401.03, Madison County, New York
    "360539402001" : "360530302001", # Block Group 1, Census Tract 9402, Madison County, New York
    "360539402002" : "360530302002", # Block Group 2, Census Tract 9402, Madison County, New York
    "360539402003" : "360530302003", # Block Group 3, Census Tract 9402, Madison County, New York
    "360539403001" : "360530303001", # Block Group 1, Census Tract 9403, Madison County, New York
    "360539403002" : "360530303002", # Block Group 2, Census Tract 9403, Madison County, New York
    "360539403003" : "360530303003", # Block Group 3, Census Tract 9403, Madison County, New York
    "360539403004" : "360530303004", # Block Group 4, Census Tract 9403, Madison County, New York
    "360539404011" : "360530304011", # Block Group 1, Census Tract 9404.01, Madison County, New York
    "360539404012" : "360530304012", # Block Group 2, Census Tract 9404.01, Madison County, New York
    "360539404013" : "360530304013", # Block Group 3, Census Tract 9404.01, Madison County, New York
    "360539404014" : "360530304014", # Block Group 4, Census Tract 9404.01, Madison County, New York
    "360539404015" : "360530304015", # Block Group 5, Census Tract 9404.01, Madison County, New York
    "360539404031" : "360530304031", # Block Group 1, Census Tract 9404.03, Madison County, New York
    "360539404032" : "360530304032", # Block Group 2, Census Tract 9404.03, Madison County, New York
    "360539404033" : "360530304033", # Block Group 3, Census Tract 9404.03, Madison County, New York
    "360539406001" : "360530306001", # Block Group 1, Census Tract 9406, Madison County, New York
    "360539406002" : "360530306002", # Block Group 2, Census Tract 9406, Madison County, New York
    "360539406003" : "360530306003", # Block Group 3, Census Tract 9406, Madison County, New York
    "360539406004" : "360530306004", # Block Group 4, Census Tract 9406, Madison County, New York
    "360539407001" : "360530304021", # Block Group 1, Census Tract 9407, Madison County, New York
    "360539407002" : "360530304022", # Block Group 2, Census Tract 9407, Madison County, New York
    "360659400001" : "360650248001", # Block Group 1, Census Tract 9400, Oneida County, New York
    "360659400002" : "360650248002", # Block Group 2, Census Tract 9400, Oneida County, New York
    "360659401001" : "360650247001", # Block Group 1, Census Tract 9401, Oneida County, New York
    "360659401002" : "360650247002", # Block Group 2, Census Tract 9401, Oneida County, New York
    "360659401003" : "360650247003", # Block Group 3, Census Tract 9401, Oneida County, New York
    "360659401004" : "360650247004", # Block Group 4, Census Tract 9401, Oneida County, New York
    "360659402001" : "360650249001", # Block Group 1, Census Tract 9402, Oneida County, New York
    "360659402002" : "360650249002", # Block Group 2, Census Tract 9402, Oneida County, New York
    "360659402003" : "360650249003", # Block Group 3, Census Tract 9402, Oneida County, New York
    #"360850089000" : "WATER" # Block Group 0, Census Tract 89, Richmond County, New York
}

census_2010_gq = collections.OrderedDict()
census_2010_pphu = collections.OrderedDict()

census_2010_hu = collections.OrderedDict()
census_2010_pop_hu = collections.OrderedDict()

with open("data/R11633875_SL150.txt", "r", encoding="latin-1") as f:
    reader = csv.DictReader(f, delimiter="\t")

    for row in reader:
        state_code = row["Geo_STATE"]
        county_code = row["Geo_COUNTY"]
        county_fips = state_code + county_code
        fips_code = row["Geo_FIPS"]

        if state_code in state_fips_to_name:
            
            pphu_value = float(row["SF1_P0170001"])
            gq_value = float(row["SF1_P0420001"])
            
            hu_value = float(row["SF1_H0010001"])
            pop_hu_value = float(row["SF1_H0100001"])
            
            if not fips_code in block_group_estimates:
            
                if fips_code in mapping_census_2010_to_acs_2012:
                    fips_code = mapping_census_2010_to_acs_2012[fips_code]
                else:
                    print("This should only be reported for a single BG in Richmond, NY")
                    print(fips_code, row["Geo_QName"], gq_value, pphu_value)

            census_2010_gq[fips_code] = gq_value
            census_2010_pphu[fips_code] = pphu_value
            census_2010_hu[fips_code] = hu_value
            census_2010_pop_hu[fips_code] = pop_hu_value

This should only be reported for a single BG in Richmond, NY
360850089000 Block Group 0, Census Tract 89, Richmond County, New York 0.0 0.0


In [19]:
for fips_code in block_group_estimates.keys():
    assert fips_code in census_2010_gq

### Create population estimates

In [25]:
predicted_population = []
total_diff = 0.0
for i in range(n):    
    block_group_fips = block_group_fip_list[i]
    
    pphu = census_2010_pphu[block_group_fips]
    gq = census_2010_gq[block_group_fips]
    estimated_housing_units = block_group_estimates[block_group_fips]
    
    pphu_2 = 0.0
    if census_2010_hu[block_group_fips] == 0:
        if not census_2010_pop_hu[block_group_fips] == 0:
            print(census_2010_pop_hu[block_group_fips])    
    else:
        pphu_2 = census_2010_pop_hu[block_group_fips] / census_2010_hu[block_group_fips]
    
    estimated_population = estimated_housing_units * pphu_2 + gq
    predicted_population.append(estimated_population)
    
predicted_population = np.array(predicted_population)

In [23]:
total_diff

-36053532.57999963

### Write population estimates

In [26]:
f = open("data/regression_estimated_population.csv", "w")
f.write("Geo_COUNTY_FIPS,Geo_BG_FIPS,%s\n" % (",".join(map(str, predicted_years))))
for i in range(n):
    f.write("%s,%s,%s\n" % (
        county_fip_list[i],
        block_group_fip_list[i],
        ",".join(map(str, predicted_population[i,:]))
    ))
f.close()