In [34]:
import censusdata
import pandas as pd
import numpy as np

In [35]:
#function to make list of all county ids in state (given by census state id)
def county_list(state_number):
    counties = censusdata.geographies(censusdata.censusgeo([('state', state_number), ('county','*')]), 'acs5', 2018)
    county_list = []
    for i in counties.keys():
        county_list.append(counties[i].geo[1][1])
    return county_list

#function to pull defined variables for blocks in specified state, looping over counties (can't pull all blocks in a state)
#(input state id and list of variables)
def block_pull(state_id, variable_list):
    c_list = county_list(state_id)
    for i in range(0, len(c_list)):
        geo = censusdata.censusgeo([('state', state_id), ('county', c_list[i]), ('tract','*'),('block group','*')])
        county_df = censusdata.download('acs5', 2018, geo, variable_list)
        if i == 0:
            data = county_df
        else:
            data = pd.concat([data, county_df])
    return data

In [None]:
#just one variable in here for now, need to select and add variables
variables_list = ['B02001_001E','B02001_002E','B02001_003E','B02001_004E','B02001_005E','B02001_006E','B02001_007E',
                  'B02001_008E','B28006_001E','B28006_002E','B28006_008E','B28006_014E','B19013_001E','B17010_002E',
                  'B17010_001E','C16002_001E','C16002_004E','C16002_010E','C16002_013E','B03002_001E','B03002_012E']

block_data = block_pull('39', variables_list)

In [None]:
#make more meaningful variables, ie turn raw counts into proportions, etc.

#make % white column
block_data['PCT_White'] = block_data['B02001_002E']/block_data['B02001_001E']
#make % black and african american
block_data['PCT_Black_or_AA'] = block_data['B02001_003E']/block_data['B02001_002E']
#make % Asian column
block_data['PCT_Asian'] = block_data['B02001_005E']/block_data['B02001_002E']
#make % Hispanic/Latino
block_data['PCT_Hispanic_Latino'] = block_data['B03002_012E']/block_data['B03002_001E']
#make % Less than high school education
block_data['PCT_Less_Than_High_School'] = block_data['B28006_002E']/block_data['B28006_001E']
#make % high school no college
block_data['PCT_High_School_Grad'] = block_data['B28006_008E']/block_data['B28006_001E']
#make % bachelors or higher
block_data['PCT_Bach_Or_Higher'] = block_data['B28006_014E']/block_data['B28006_001E']
#Rename Median Income
block_data['Median_Income'] = block_data['B19013_001E']
#% Households w/ income below poverty line in last 12 months
block_data['PCT_Households_Poverty'] = block_data['B17010_002E']/block_data['B17010_001E']
#% households limited english spoken
block_data['PCT_Households_Limited_English'] = (block_data['C16002_004E']+block_data['C16002_010E']+block_data['C16002_013E'])/block_data['C16002_001E']

#drop old columns
block_data = block_data.drop(variables_list,axis=1)
# get rid of inf values for upload
block_data.replace([np.inf, -np.inf], np.nan)

In [None]:
block_data = block_data.replace([np.inf, -np.inf], np.nan)

In [None]:
# SAVE DATA, INDEX IS JUST FOR TESTING
data = block_data
data['index']=range(len(data))
data.head()

In [None]:
data.to_csv('test.csv', index=False)