In [None]:
# Import necessary libraries for census data collection

import autocensus
from autocensus import Query
import pandas as pd
import geopandas as gpd
import numpy as np

# Seclect census granularities of interest 
grans = ['county',
         'zip code tabulation area',
         'tract']

# Select States To Gather Data From 

states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

# Initlaize a list to Keep track of DataFrames From each state 

dfs = []

# Select Variables of Interest 

variables=['B01003_001E',
                   'B01002_001E',
                   'B02001_002E',
                   'B02001_003E',
                   'B02001_004E',
                   'B02001_005E',
                   'B03001_004E',
                   'B02001_009E',
                   'B06009_002E',
                   'B06009_003E',
                   'B06009_004E',
                   'B06009_005E',
                   'B06009_006E',
                   'B06008_003E',
                   'B19013_001E',
                   'B08131_001E',
                   'B08128_001E',
                   'B25031_001E',
                   'B25032_001E',
                   'B25032_013E',
                   'B07013_002E',
                   'B07013_003E']

# Gather Data For Each Granularity for each state 

for gran in grans:
    tests = []
    for state in states:
        autocensus.clear_cache()
        # Set Query Parameters (ACS Type, Time Period and Geometry)
        query = Query(
        estimate=5,
        years=[2018,2019],
        variables=variables,
        for_geo=[gran +':*'],
        in_geo=['state:'+state],
        geometry='polygons',
        census_api_key='8fb00fadc62b57209f406b9127acb553e3f2d6a3')
        dfs.append(query.run())
        df = pd.concat(dfs)
        df = df.set_index('name')  
        dfs = []
        # Clean And Format Each Relvant Variable As Well As Growth Rates Then Put them into DataFrame
        dfpop2018 = df[(df["variable_concept"]=="TOTAL POPULATION") & (df["year"]==2018)]
        dfpop2019 = df[(df["variable_concept"]=="TOTAL POPULATION") & (df["year"]==2019)]
        dfmedianAge2019 = df[(df["variable_concept"]=="MEDIAN AGE BY SEX") & (df["year"]==2019)]
        dfmedianAge2018 = df[(df["variable_concept"]=="MEDIAN AGE BY SEX") & (df["year"]==2018)]
        dfmedianIncome2019 = df[(df["variable_concept"]=="MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2019 INFLATION-ADJUSTED DOLLARS)") & (df["year"]==2019)]
        dfmedianIncome2018 = df[(df["variable_concept"]=="MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2018 INFLATION-ADJUSTED DOLLARS)") & (df["year"]==2018)]
        dfpopw2019 = df[(df["variable_label"]=="Estimate!!Total:!!White alone") & (df["year"]==2019)]
        dfpopw2018 = df[(df["variable_label"]=="Estimate!!Total!!White alone") & (df["year"]==2018)]
        dfpopb2019 = df[(df["variable_label"]=="Estimate!!Total:!!Black or African American alone") & (df["year"]==2019)]
        dfpopb2018 = df[(df["variable_label"]=="Estimate!!Total!!Black or African American alone") & (df["year"]==2018)]
        dfpopai2019 = df[(df["variable_label"]=="Estimate!!Total:!!American Indian and Alaska Native alone") & (df["year"]==2019)]
        dfpopai2018 = df[(df["variable_label"]=="Estimate!!Total!!American Indian and Alaska Native alone") & (df["year"]==2018)]
        dfpopa2019 = df[(df["variable_label"]=="Estimate!!Total:!!Asian alone") & (df["year"]==2019)]
        dfpopa2018 = df[(df["variable_label"]=="Estimate!!Total!!Asian alone") & (df["year"]==2018)]
        dfpoptwo2019 = df[(df["variable_label"]=="Estimate!!Total:!!Two or more races:!!Two races including Some other race") & (df["year"]==2019)]
        dfpoptwo2018 = df[(df["variable_label"]=="Estimate!!Total!!Two or more races!!Two races including Some other race") & (df["year"]==2018)]
        dfpoph2019 = df[(df["variable_label"]=="Estimate!!Total:!!Hispanic or Latino:!!Mexican") & (df["year"]==2019)]
        dfpoph2018 = df[(df["variable_label"]=="Estimate!!Total!!Hispanic or Latino!!Mexican") & (df["year"]==2018)]
        dfpopm2019 = df[(df["variable_label"]=="Estimate!!Total:!!Now married, except separated") & (df["year"]==2019)]
        dfpopm2018 = df[(df["variable_label"]=="Estimate!!Total!!Now married, except separated") & (df["year"]==2018)]
        dfpop2019['Population Density'] = dfpop2019['value'] / gpd.GeoDataFrame(dfpop2019).area
        dfpop2019['Population Growth Rate'] = (dfpop2019['value'] - dfpop2018['value']) / dfpop2018['value']*100
        dfpop2019['Population'] = dfpop2019['value']
        dfpop = dfpop2019[['Population','geometry','Population Density','Population Growth Rate']]
        dfs.append(dfpop)
        dfmedianAge2019['Age Growth Rate'] = (dfmedianAge2019['value'] - dfmedianAge2018['value']) / dfmedianAge2018['value']*100
        dfmedianAge2019['Median Age'] = dfmedianAge2019['value']
        dfmedianAge = dfmedianAge2019[['Median Age', 'geometry', 'Age Growth Rate']]
        dfs.append(dfmedianAge)
        dfmedianIncome2019['Income Growth Rate'] = (dfmedianIncome2019['value'] - dfmedianIncome2018['value']) / dfmedianIncome2018['value']*100
        dfmedianIncome2019['Median Household Income'] = dfmedianIncome2019['value']
        dfmedianIncome = dfmedianIncome2019[['Income Growth Rate','geometry','Median Household Income']]
        dfs.append(dfmedianIncome)
        dfpopw2019['White Population Density'] = dfpopw2019['value'] / gpd.GeoDataFrame(dfpopw2019).area
        dfpopw2019['White Population Growth Rate'] = (dfpopw2019['value'] - dfpopw2018['value']) / dfpopw2018['value']*100
        dfpopw2019['White Population'] = dfpopw2019['value']
        dfwpop = dfpopw2019[['White Population','geometry','White Population Density','White Population Growth Rate']]
        dfs.append(dfwpop)
        dfpopb2019['Black Population Density'] = dfpopb2019['value'] / gpd.GeoDataFrame(dfpopb2019).area
        dfpopb2019['Black Population Growth Rate'] = (dfpopb2019['value'] - dfpopb2018['value']) / dfpopb2018['value']*100
        dfpopb2019['Black Population'] = dfpopb2019['value']
        dfbpop = dfpopb2019[['Black Population','geometry','Black Population Density','Black Population Growth Rate']]
        dfs.append(dfbpop)
        dfpopai2019['American Indian Population Density'] = dfpopai2019['value'] / gpd.GeoDataFrame(dfpopai2019).area
        dfpopai2019['American Indian Population Growth Rate'] = (dfpopai2019['value'] - dfpopai2018['value']) / dfpopai2018['value']*100
        dfpopai2019['American Indian Population'] = dfpopai2019['value']
        dfaipop = dfpopai2019[['American Indian Population','geometry','American Indian Population Density','American Indian Population Growth Rate']]
        dfs.append(dfaipop)
        dfpopa2019['Asian Population Density'] = dfpopa2019['value'] / gpd.GeoDataFrame(dfpopa2019).area
        dfpopa2019['Asian Population Growth Rate'] = (dfpopa2019['value'] - dfpopa2018['value']) / dfpopa2018['value']*100
        dfpopa2019['Asian Population'] = dfpopa2019['value']
        dfapop = dfpopa2019[['Asian Population','geometry','Asian Population Density','Asian Population Growth Rate']]
        dfs.append(dfapop)
        dfpoptwo2019['Biracial Population Density'] = dfpoptwo2019['value'] / gpd.GeoDataFrame(dfpoptwo2019).area
        dfpoptwo2019['Biracial Population Growth Rate'] = (dfpoptwo2019['value'] - dfpoptwo2018['value']) / dfpoptwo2018['value']*100
        dfpoptwo2019['Biracial Population'] = dfpoptwo2019['value']
        dftwopop = dfpoptwo2019[['Biracial Population','geometry','Biracial Population Density','Biracial Population Growth Rate']]
        dfs.append(dftwopop)
        dfpoph2019['Hispanic Population Density'] = dfpoph2019['value'] / gpd.GeoDataFrame(dfpoph2019).area
        dfpoph2019['Hispanic Population Growth Rate'] = (dfpoph2019['value'] - dfpoph2018['value']) / dfpoph2018['value']*100
        dfpoph2019['Hispanic Population'] = dfpoph2019['value']
        dfhpop = dfpoph2019[['Hispanic Population','geometry','Hispanic Population Density','Hispanic Population Growth Rate']]
        dfs.append(dfhpop)
        dfpopm2019['Married Population Density'] = dfpopm2019['value'] / gpd.GeoDataFrame(dfpopm2019).area
        dfpopm2019['Married Population Growth Rate'] = (dfpopm2019['value'] - dfpopm2018['value']) / dfpopm2018['value']*100
        dfpopm2019['Married Population'] = dfpopm2019['value']
        dfmpop = dfpopm2019[['Married Population','geometry','Married Population Density','Married Population Growth Rate']]
        dfs.append(dfmpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Less than high school graduate") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Total!!Less than high school graduate") & (df["year"]==2018)]
        dfpopnh2019['No High School Grad Population Density'] = dfpopnh2019['value'] / gpd.GeoDataFrame(dfpopnh2019).area
        dfpopnh2019['No High School Grad Population Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['No High School Grad Population'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['No High School Grad Population','geometry','No High School Grad Population Density','No High School Grad Population Growth Rate']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!High school graduate (includes equivalency)") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Total!!High school graduate (includes equivalency)") & (df["year"]==2018)]
        dfpopnh2019['High School Grad Population Density'] = dfpopnh2019['value'] / gpd.GeoDataFrame(dfpopnh2019).area
        dfpopnh2019['High School Grad Population Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['High School Grad Population'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['High School Grad Population','geometry','High School Grad Population Density','High School Grad Population Growth Rate']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Some college or associate's degree") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Total!!Some college or associate's degree") & (df["year"]==2018)]
        dfpopnh2019['Some college Population Density'] = dfpopnh2019['value'] / gpd.GeoDataFrame(dfpopnh2019).area
        dfpopnh2019['Some college Population Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Some college Population'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Some college Population','geometry','Some college Population Density','Some college Population Growth Rate']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Bachelor's degree") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Total!!Bachelor's degree") & (df["year"]==2018)]
        dfpopnh2019['College Grad Population Density'] = dfpopnh2019['value'] / gpd.GeoDataFrame(dfpopnh2019).area
        dfpopnh2019['College Grad Population Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['College Grad Population'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['College Grad Population','geometry','College Grad Population Density','College Grad Population Growth Rate']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Graduate or professional degree") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Total!!Graduate or professional degree") & (df["year"]==2018)]
        dfpopnh2019['Graduate Grad Population Density'] = dfpopnh2019['value'] / gpd.GeoDataFrame(dfpopnh2019).area
        dfpopnh2019['Graduate Grad Population Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Graduate Grad Population'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Graduate Grad Population','geometry','Graduate Grad Population Density','Graduate Grad Population Growth Rate']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Median gross rent --!!Total:") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]=="Estimate!!Median gross rent --!!Total") & (df["year"]==2018)]
        dfpopnh2019['Census Rent Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Census Rent'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Census Rent Growth Rate','geometry','Census Rent']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Renter-occupied housing units:") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]== "Estimate!!Total!!Renter-occupied housing units") & (df["year"]==2018)]
        dfpopnh2019['Census Units Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Census Total Units'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Census Units Growth Rate','geometry','Census Total Units']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Householder lived in owner-occupied housing units") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]== "Estimate!!Total!!Householder lived in owner-occupied housing units") & (df["year"]==2018)]
        dfpopnh2019['Home Owner Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Census Total Homeowners'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Home Owner Growth Rate','geometry','Census Total Homeowners']]
        dfs.append(dfnhpop)
        dfpopnh2019 = df[(df["variable_label"]=="Estimate!!Total:!!Householder lived in renter-occupied housing units") & (df["year"]==2019)]
        dfpopnh2018 = df[(df["variable_label"]== "Estimate!!Total!!Householder lived in renter-occupied housing units") & (df["year"]==2018)]
        dfpopnh2019['Renter Growth Rate'] = (dfpopnh2019['value'] - dfpopnh2018['value']) / dfpopnh2018['value']*100
        dfpopnh2019['Census Total Renters'] = dfpopnh2019['value']
        dfnhpop = dfpopnh2019[['Renter Growth Rate','geometry','Census Total Renters']]
        dfs.append(dfnhpop)
        test = pd.concat(dfs,axis=1)
        test['Percent White'] = test['White Population'] / test['Population'] * 100
        test['Percent Black'] = test['Black Population'] / test['Population'] * 100
        test['Percent American Indian'] = test['American Indian Population'] / test['Population'] * 100
        test['Percent Asian'] = test['Asian Population'] / test['Population'] * 100
        test['Percent Biracial'] = test['Biracial Population'] / test['Population'] * 100
        test['Percent Hispanic'] = test['Hispanic Population'] / test['Population'] * 100
        test['Percent Married'] = test['Married Population'] / test['Population'] * 100
        test['Percent No High School Grad'] = test['No High School Grad Population'] / test['Population'] * 100
        test['Percent High School Grad'] = test['High School Grad Population'] / test['Population'] * 100
        test['Percent Some college'] = test['Some college Population'] / test['Population'] * 100
        test['Percent College Grad'] = test['College Grad Population'] / test['Population'] * 100
        test['Percent Graduate Grad'] = test['Graduate Grad Population'] / test['Population'] * 100
        test['Rent Affordablity Ratio'] = (test['Census Rent'] * 12) / test['Median Household Income'] * 100
        test['Renter Proportion'] = (test['Census Total Renters']) / (test['Census Total Renters'] + test['Census Total Homeowners'])
        test['Units Per Capita'] = test['Census Total Units'] / (test['Population'] * test['Renter Proportion'])
        test['Renter Proportion'] = test['Renter Proportion'] * 100
        test.replace([np.inf], 1, inplace=True)
        test = test.loc[:,~test.columns.duplicated()]
        tests.append(test)
        autocensus.clear_cache()
    # Create One Sptail DataFrame (GEOJson) For Each Granularity
    test = pd.concat(tests,axis=0)
    test_all =  gpd.GeoDataFrame(test)
    test_all.to_file("/Users/ryanbrown/Downloads/census_" + gran + ".json", driver="GeoJSON")

In [None]:
# Load Desiered File to be cleaned (If you want to parse down the variable count)
df = gpd.read_file('/Users/ryanbrown/Downloads/censusOrigintract.json')

In [None]:
#Add County and State Feilds 
df[['Tract ID', "County", "State"]] =  df['name'].str.split(",",expand=True)

In [None]:
#Selects relavent variables 
df = df[['Population',
        'Population Density',
         'Median Household Income',
         'Income Growth Rate',
         'Population Growth Rate',
         'Census Rent',
         'Renter Proportion',
         'Rent Affordablity Ratio',
         'Units Per Capita',
         'geometry',
         'Tract ID', 
         'County', 
         'State']]

In [None]:
#Cleans output by rounding data
df = df.round(2)

In [None]:
# Write to file ready to be uploaded to kepler 
df.to_file("/Users/ryanbrown/Downloads/census_ST.json", driver="GeoJSON")