In [1]:
from functools import reduce
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
#Converted TIGER GDB to .csv files using QGIS.
age_sex = pd.read_csv('age_sex.csv')[['GEOID', 'B01001e1', 'B01001e2', 'B01002e1']]
race = pd.read_csv('race.csv')[['GEOID', 'B02001e2', 'B02001e3', 'B02001e4', 'B02001e5']]
hispanic = pd.read_csv('hispanic_latino.csv')[['GEOID', 'B03002e12']]
household = pd.read_csv('household_family.csv')[['GEOID', 'B11001e1']]
edu = pd.read_csv('education.csv')[['GEOID'] + ['B15003e' + str(i) for i in range(2, 26)]]
income = pd.read_csv('income.csv')[['GEOID', 'B19013e1']]
employ = pd.read_csv('employment_status.csv')[['GEOID', 'B23025e3', 'B23025e5']]
housing = pd.read_csv('housing_characteristics.csv')[['GEOID', 'B25001e1', 'B25003e2', 'B25003e3', 'B25034e2', 'B25038e3', 'B25038e10']]

dfs = [age_sex, race, hispanic, household, edu, income, employ, housing]
acs = reduce(lambda left, right: pd.merge(left, right, on='GEOID'), dfs)
acs['GEOID'] = acs['GEOID'].str[-12:]
acs = acs[~acs['GEOID'].str[:2].isin(['02', '15', '72'])]

acs.columns = ['GEOID', 'population', 'male_population', 'median_age', 'white', 'black', 'native', 'asian',
               'hispanic', 'households', 'no_schooling', 'nursery', 'kindergarten', '1st_grade', '2nd_grade',
               '3rd_grade', '4th_grade', '5th_grade', '6th_grade', '7th_grade', '8th_grade', '9th_grade',
               '10th_grade', '11th_grade', '12th_grade', 'high_school', 'GED', 'college<1', 'college>=1', 
               "associate's", "bachelor's", "master's", 'professional', 'doctorate', 'median_income',
               'labor_force', 'unemployed', 'housing_units', 'owner', 'renter', 'built_2014+', 'owner_2015+',
               'renter_2015+']

In [3]:
#Used TIGER shapefiles to get road lengths, elevation data, and temperature data using QGIS.
bg_info = pd.read_csv('block_group_info.csv')
bg_info['GEOID'] = bg_info['GEOID'].astype(str).str.zfill(12)
bg_info.columns = ['state_code', 'county_code', 'tract_code', 'bg_code', 'GEOID', 'land_area', 'water_area',
                  'latitude', 'longitude', 'elev_mean', 'elev_stdev', 'elev_range']

roads = pd.read_csv('tl_2018_road_lengths.csv')
roads['GEOID'] = roads['GEOID'].astype(str).str.zfill(12)
roads.columns = ['GEOID', 'road_length']

temp = pd.read_csv('temperatures.csv')
temp['GEOID'] = temp['GEOID'].astype(str).str.zfill(12)
temp.columns = ['GEOID', 'winter_temp']

data = reduce(lambda left, right: pd.merge(left, right, on='GEOID'), [bg_info, acs, roads, temp])

data['GEOID'] = data['GEOID'].astype(str)

In [4]:
data.to_csv('blockgroup_2016_relevant_group1.csv', index = False)