In [3]:
import geopandas as gpd
import census as gc
import os
import pandas as pd
from shapely import geometry

In [17]:
# which census dataset
dataset = 'acs/acs5'

# which vintage year
year = 2019

# which census variables to retrieve for each tract
variables = {'DP05_0001E':'total_pop',    #total pop
             'DP05_0018E':'median_age',    #median age
             'DP05_0071PE':'pct_hispanic',   #pct pop hispanic or latino
             'DP05_0077PE':'pct_white',   #pct pop non-hispanic white alone
             'DP05_0078PE':'pct_black',   #pct pop non-hispanic black
             'DP05_0080E':'pct_asian',   #pct pop non-hispanic asian
             'DP05_0002PE':'pct_male',   #pct pop male
             'DP04_0007PE':'pct_single_family_home',   #pct single family detached homes
             'DP04_0089E':'med_home_value',    #median value of owner occupied units (dollars)
             'DP04_0037E':'med_rooms_per_home',    #median number of rooms in house
             'DP04_0026PE':'pct_built_before_1940',   #pct structure built 1939 or earlier
             'DP04_0047PE':'pct_renting',   #pct renter-occupied housing units
             'DP04_0005E':'rental_vacancy_rate',    #rental vacancy rate
             'DP04_0049E':'avg_renter_household_size',    #average household size of renter-occupied housing units
             'DP04_0134E':'med_gross_rent',    #median gross rent (dollars)
             'DP03_0062E':'med_household_income',    #median household income
             'DP03_0025E':'mean_commute_time',    #mean travel time to work
             'DP03_0019PE':'pct_commute_drive_alone',   #pct commute drove alone
             'DP03_0128PE':'pct_below_poverty',   #pct people with income below povery level
             'DP02_0057PE':'pct_college_grad_student',   #pct who are students currently enrolled in college or grad school
             'DP02_0079PE':'pct_same_residence_year_ago',   #pct residence 1 year ago was same house
             'DP02_0067PE':'pct_bachelors_degree',   #pct bachelor's degree or higher
             'DP02_0111PE':'pct_english_only',   #pct with english only language spoken at home
             'DP02_0092PE':'pct_foreign_born'}   #pct of population foreign born

# data directories
# Sourced from https://www2.census.gov/geo/tiger/TIGER2019/TRACT/
tracts_path = 'tl_2019_37_tract'

In [6]:
# download and display census descriptions of each variable
variable_descriptions = gc.get_census_variable_descriptions(dataset=dataset, 
                                                            year=year, 
                                                            variables=variables)
for v, d in variable_descriptions.items():
    print('{}\t{}'.format(variables[v], d['label']))

total_pop	Estimate!!SEX AND AGE!!Total population
median_age	Estimate!!SEX AND AGE!!Total population!!Median age (years)
pct_hispanic	Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)
pct_white	Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
pct_black	Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone
pct_asian	Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Asian alone
pct_male	Percent!!SEX AND AGE!!Total population!!Male
pct_single_family_home	Percent!!UNITS IN STRUCTURE!!Total housing units!!1-unit, detached
med_home_value	Estimate!!VALUE!!Owner-occupied units!!Median (dollars)
med_rooms_per_home	Estimate!!ROOMS!!Total housing units!!Median rooms
pct_built_before_1940	Percent!!YEAR STRUCTURE BUILT!!Total housing units!!Built 1939 or earlier
pct_renting	Percent!!HOUSING TENURE!!Occupied housing units!!Renter-o

In [4]:
acs = pd.read_csv("../data/acs_clean.csv")

In [5]:
acs = acs[acs['NAME'].str.endswith("North Carolina")]

In [6]:
# acs.to_csv("../data/acs_clean.csv")

In [7]:
acs = acs[['GEOID','NAME', 'variable','estimate']]

In [8]:
# which census variables to retrieve for each tract
vars_abridged = {'DP05_0001':'total_pop',    #total pop
             'DP05_0018':'median_age',    #median age
             'DP05_0071P':'pct_hispanic',   #pct pop hispanic or latino
             'DP05_0077P':'pct_white',   #pct pop non-hispanic white alone
             'DP05_0078P':'pct_black',   #pct pop non-hispanic black
             'DP05_0080':'pct_asian',   #pct pop non-hispanic asian
             'DP05_0002P':'pct_male',   #pct pop male
             'DP04_0007P':'pct_single_family_home',   #pct single family detached homes
             'DP04_0089':'med_home_value',    #median value of owner occupied units (dollars)
             'DP04_0037':'med_rooms_per_home',    #median number of rooms in house
             'DP04_0026P':'pct_built_before_1940',   #pct structure built 1939 or earlier
             'DP04_0047P':'pct_renting',   #pct renter-occupied housing units
             'DP04_0005':'rental_vacancy_rate',    #rental vacancy rate
             'DP04_0049':'avg_renter_household_size',    #average household size of renter-occupied housing units
             'DP04_0134':'med_gross_rent',    #median gross rent (dollars)
             'DP03_0062':'med_household_income',    #median household income
             'DP03_0025':'mean_commute_time',    #mean travel time to work
             'DP03_0019P':'pct_commute_drive_alone',   #pct commute drove alone
             'DP03_0128P':'pct_below_poverty',   #pct people with income below povery level
             'DP02_0057P':'pct_college_grad_student',   #pct who are students currently enrolled in college or grad school
             'DP02_0079P':'pct_same_residence_year_ago',   #pct residence 1 year ago was same house
             'DP02_0067P':'pct_bachelors_degree',   #pct bachelor's degree or higher
             'DP02_0111P':'pct_english_only',   #pct with english only language spoken at home
             'DP02_0092P':'pct_foreign_born'}   #pct of population foreign born


acs['variable_names'] = acs['variable'].map(vars_abridged)

In [9]:
# acs.to_csv("../data/acs_clean_final.csv")

In [13]:
acs.head()

Unnamed: 0,GEOID,NAME,variable,estimate,variable_names
0,37001,"Alamance County, North Carolina",DP02_0057P,21.2,pct_college_grad_student
1,37001,"Alamance County, North Carolina",DP02_0067P,87.7,pct_bachelors_degree
2,37001,"Alamance County, North Carolina",DP02_0092P,26.5,pct_foreign_born
3,37001,"Alamance County, North Carolina",DP02_0111P,2.0,pct_english_only
4,37001,"Alamance County, North Carolina",DP03_0128P,14.4,pct_below_poverty


In [34]:
acs = acs.pivot_table(values='estimate', 
                 index=[acs.GEOID, acs.NAME],
                 columns='variable_names', aggfunc='first').reset_index()

In [35]:
acs.to_csv("../data/acs_clean_final_pivoted.csv")