In [128]:
import pandas as pd
import numpy as np
import os
import geopandas as gpd

In [134]:
# List of state FIPS codes
fips = ['01', '04', '05', '06', '08', '09', '10', 
        '11', '12', '13', '16' ,'17', '18', '19', 
        '20', '21', '22', '23', '24', '25', '26',
        '27', '28', '29', '30', '31', '32', '33', 
        '34', '35', '36', '37', '38', '39', '40', 
        '41', '42', '44', '45', '46', '47', '48', 
        '49', '50', '51', '53', '54', '55', '56']

Import ACS data (2007-2014)

In [224]:
raceMap = {'E001': 'all', 'E012': 'latino', 'E003': 'white', 'E004': 'black', 
           'E005': 'native', 'E006': 'asian', 'E007': 'hwPac'}
raceKeys = list(raceMap.keys())

popData07_14 = pd.DataFrame()

dataPath = 'raw/acs'
for filename in os.listdir(dataPath):
    if filename.endswith('.csv'):
        print(filename)
        
        nhgisCode=''
        with open(os.path.join(dataPath, filename[:-4] + '_codebook.txt'), 'r') as codebook:
            cb = codebook.read().replace('\n', '')
            nhgisCode = cb[cb.find('E001') - 4: cb.find('E001')].strip()
            
        columns = [nhgisCode + s for s in raceKeys]
        
        df = pd.read_csv(os.path.join(dataPath, filename), encoding = "ISO-8859-1")
        df = df[['GISJOIN', 'YEAR', 'STATE', 'STATEA', 'COUNTY', 'COUNTYA'] + columns]
        df.COUNTYA = df.COUNTYA.astype(str)
        df.STATEA = df.STATEA.astype(str)
        df.COUNTYA = np.where(df.COUNTYA.str.len() == 1, '00' + df.COUNTYA, df.COUNTYA)
        df.COUNTYA = np.where(df.COUNTYA.str.len() == 2, '0' + df.COUNTYA, df.COUNTYA)
        df.STATEA = np.where(df.STATEA.str.len() == 1, '0' + df.STATEA, df.STATEA)
        df['countyCode'] = df.STATEA + df.COUNTYA
        df['year'] = df.YEAR.str.slice(0,4).astype(int) + 2
        df['state'] = df.STATE
        
        df = pd.melt(df, id_vars=['GISJOIN', 'state', 'year'], value_vars=columns)
        df.rename(columns={'value': 'pop', 'variable': 'race'}, inplace=True)
        df.race = df.race.str.slice(-4)
        df.race = df.race.replace(raceMap)
        
        popData07_14 = popData07_14.append(df)

nhgis0027_ds195_20095_2009_blck_grp.csv
nhgis0027_ds206_20145_2014_blck_grp.csv
nhgis0027_ds176_20105_2010_blck_grp.csv
nhgis0027_ds225_20165_2016_blck_grp.csv
nhgis0027_ds215_20155_2015_blck_grp.csv
nhgis0027_ds184_20115_2011_blck_grp.csv
nhgis0027_ds201_20135_2013_blck_grp.csv


In [225]:
# Remove non-contiguous states/territories
popData07_14 = popData07_14.loc[~popData07_14.state.isin(['Puerto Rico', 'Alaska', 'Hawaii'])]

popData07_14 = pd.pivot_table(popData07_14, values = 'pop', 
                              index=['GISJOIN', 'year'], 
                              columns = 'race').reset_index()

Import Decenial Census data (2000/2010)

In [226]:
# 2000
df = pd.read_csv('raw/census/nhgis0028_ds147_2000_blck_grp.csv', encoding = "ISO-8859-1")

raceMap = {'FYF001': 'white', 'FYF002': 'black', 'FYF003': 'native', 'FYF004': 'asian', 
           'FYF005': 'hwPac', 'FYF006': 'otherNonLat', 'FYF007': 'twoNonLat', 
           'FYF008': 'lat_white', 'FYF009': 'lat_black', 'FYF010': 'lat_native', 
           'FYF011': 'lat_asian', 'FYF012': 'lat_hwPac', 'FYF013': 'lat_other', 'FYF014':'lat_two'}
raceKeys = list(raceMap.keys())

df = df[['GISJOIN', 'YEAR', 'STATE'] + raceKeys]
df['latino'] = df.FYF008 + df.FYF009 + df.FYF010 + df.FYF011 + df.FYF012 + df.FYF013 + df.FYF014
df['all'] = df.FYF001 + df.FYF002 + df.FYF003 + df.FYF004 + df.FYF005 + df.FYF006 + df.FYF007 + \
            df.FYF008 + df.FYF009 + df.FYF010 + df.FYF011 + df.FYF012 + df.FYF013 + df.FYF014
df.drop(['FYF006', 'FYF007','FYF008', 'FYF009', 'FYF010', 
         'FYF011', 'FYF012', 'FYF013', 'FYF014'], axis=1, inplace=True)
df['year'] = df.YEAR
df['state'] = df.STATE

popData2000 = pd.melt(df, id_vars=['GISJOIN', 'state', 'year'], value_vars=['all', 'latino', 'FYF001', 'FYF002', 
                                                                   'FYF003', 'FYF004', 'FYF005'])
popData2000.rename(columns={'value': 'pop', 'variable': 'race'}, inplace=True)
popData2000.race = popData2000.race.replace(raceMap)
popData2000 = popData2000.loc[~popData2000.state.isin(['Puerto Rico', 'Alaska', 'Hawaii'])]

popData2000 = pd.pivot_table(popData2000, values = 'pop', 
                              index=['GISJOIN', 'year'], 
                              columns = 'race').reset_index()

In [227]:
# 2010
df = pd.read_csv('raw/census/nhgis0028_ds172_2010_blck_grp.csv', encoding = "ISO-8859-1")

raceMap = {'H7Z001':'all', 'H7Z010': 'latino', 'H7Z003': 'white', 'H7Z004': 'black', 
           'H7Z005': 'native', 'H7Z006': 'asian', 'H7Z007': 'hwPac'}
raceKeys = list(raceMap.keys())

df = df[['GISJOIN', 'YEAR', 'STATE', 'STATEA', 'COUNTY', 'COUNTYA'] + raceKeys]
df['year'] = df.YEAR
df['state'] = df.STATE

popData2010 = pd.melt(df, id_vars=['GISJOIN', 'state', 'year'], value_vars=raceMap)
popData2010.rename(columns={'value': 'pop', 'variable': 'race'}, inplace=True)
popData2010.race = popData2010.race.replace(raceMap)
popData2010 = popData2010.loc[~popData2010.state.isin(['Puerto Rico', 'Alaska', 'Hawaii'])]

popData2010 = pd.pivot_table(popData2010, values = 'pop', 
                              index=['GISJOIN', 'year'], 
                              columns = 'race').reset_index()

In [229]:
popData = popData2000.append(popData2010)
popData = popData.append(popData07_14)

In [231]:
years = list(popData.year.unique())

In [251]:
for y in years:
    popDataSubset = popData.loc[popData.year == y]

    if y < 2008:
        fp = "shapefiles/US_blck_grp_2000.shp"
        data = gpd.read_file(fp)
        data = data.loc[data.FIPSSTCO.str.slice(0,2).isin(fips)]
    elif y == 2008:
        fp = "shapefiles/US_blck_grp_2010.shp"
        data = gpd.read_file(fp)
        data = data.loc[data.STATEFP10.isin(fips)]         
    elif y == 2010:
        fp = "shapefiles/US_blck_grp_2010.shp"
        data = gpd.read_file(fp)
        data = data.loc[data.STATEFP10.isin(fips)] 
    else:
        fp = "shapefiles/US_blck_grp_" + str(y+2) + ".shp"
        data = gpd.read_file(fp)
        data = data.loc[data.STATEFP.isin(fips)] 

    data = data[['GISJOIN', 'geometry']]
    data = data.merge(popDataSubset, how='outer', on='GISJOIN', indicator=True)
    if sum(data.loc[data._merge == 'right_only']['all']) > 300000:
        print("More than 300,000 people are being dropped in merge")
        break
    elif len(data) > 250000:
        print("Bad merge: too many blocks")
        break
    else:
        data = data.loc[data._merge != 'right_only']
        data.drop(['_merge'], axis=1, inplace=True)
        s = {'geometry': 'Polygon', 'properties': {'GISJOIN': 'str:20',
                                                   'year': 'int:4', 
                                                   'all': 'int', 
                                                   'asian': 'int', 
                                                   'black': 'int',
                                                   'hwPac': 'int',
                                                   'latino': 'int', 
                                                   'native': 'int', 
                                                   'white': 'int'}}
        outfp = r"shapefiles/pop_" + str(y) + ".shp"
        data.to_file(outfp, schema=s)
        print(outfp)

shapefiles/pop_2008.shp
shapefiles/pop_2009.shp
shapefiles/pop_2011.shp
shapefiles/pop_2012.shp
shapefiles/pop_2013.shp
shapefiles/pop_2014.shp


END