In [2]:
import censusdata
import pandas as pd

In [3]:
#function to make list of all county ids in state (given by census state id)
def county_list(state_number):
    counties = censusdata.geographies(censusdata.censusgeo([('state', state_number), ('county','*')]), 'acs5', 2018)
    county_list = []
    for i in counties.keys():
        county_list.append(counties[i].geo[1][1])
    return county_list

#function to pull defined variables for blocks in specified state, looping over counties (can't pull all blocks in a state)
#(input state id and list of variables)
def block_pull(state_id, variable_list):
    c_list = county_list(state_id)
    for i in range(0, len(c_list)):
        geo = censusdata.censusgeo([('state', state_id), ('county', c_list[i]), ('tract','*'),('block group','*')])
        county_df = censusdata.download('acs5', 2018, geo, variable_list)
        if i == 0:
            data = county_df
        else:
            data = pd.concat([data, county_df])
    return data

In [4]:
#just one variable in here for now, need to select and add variables
variables_list = ['B02001_001E','B02001_002E','B02001_003E','B02001_004E','B02001_005E','B02001_006E','B02001_007E',
                  'B02001_008E','B28006_001E','B28006_002E','B28006_008E','B28006_014E','B19013_001E','B17010_002E',
                  'B17010_001E','C16002_001E','C16002_004E','C16002_010E','C16002_013E','B03002_001E','B03002_012E']

block_data = block_pull('39', variables_list)

In [6]:
#make more meaningful variables, ie turn raw counts into proportions, etc.

#make % white column
block_data['PCT_White'] = block_data['B02001_002E']/block_data['B02001_001E']
#make % black and african american
block_data['PCT_Black_or_AA'] = block_data['B02001_003E']/block_data['B02001_002E']
#make % Asian column
block_data['PCT_Asian'] = block_data['B02001_005E']/block_data['B02001_002E']
#make % Hispanic/Latino
block_data['PCT_Hispanic_Latino'] = block_data['B03002_012E']/block_data['B03002_001E']
#make % Less than high school education
block_data['PCT_Less_Than_High_School'] = block_data['B28006_002E']/block_data['B28006_001E']
#make % high school no college
block_data['PCT_High_School_Grad'] = block_data['B28006_008E']/block_data['B28006_001E']
#make % bachelors or higher
block_data['PCT_Bach_Or_Higher'] = block_data['B28006_014E']/block_data['B28006_001E']
#Rename Median Income
block_data['Median_Income'] = block_data['B19013_001E']
#% Households w/ income below poverty line in last 12 months
block_data['PCT_Households_Poverty'] = block_data['B17010_002E']/block_data['B17010_001E']
#% households limited english spoken
block_data['PCT_Households_Limited_English'] = (block_data['C16002_004E']+block_data['C16002_010E']+block_data['C16002_013E'])/block_data['C16002_001E']

#drop old columns
block_data = block_data.drop(variables_list,axis=1)

In [7]:
# SAVE DATA, INDEX IS JUST FOR TESTING
data = block_data
data['index']=range(len(data))
data.head()

Unnamed: 0,PCT_White,PCT_Black_or_AA,PCT_Asian,PCT_Hispanic_Latino,PCT_Less_Than_High_School,PCT_High_School_Grad,PCT_Bach_Or_Higher,Median_Income,PCT_Households_Poverty,PCT_Households_Limited_English,index
"Block Group 3, Census Tract 9540, Gallia County, Ohio: Summary level: 150, state:39> county:053> tract:954000> block group:3",0.849407,0.083843,0.071616,0.02003,0.114222,0.599104,0.286674,40563,0.153846,0.0,0
"Block Group 2, Census Tract 9540, Gallia County, Ohio: Summary level: 150, state:39> county:053> tract:954000> block group:2",0.971014,0.029851,0.0,0.0,0.2019,0.776722,0.021378,32045,0.534161,0.0,1
"Block Group 1, Census Tract 9540, Gallia County, Ohio: Summary level: 150, state:39> county:053> tract:954000> block group:1",0.976,0.0,0.02459,0.0,0.281167,0.527851,0.190981,44167,0.141914,0.0,2
"Block Group 2, Census Tract 9539, Gallia County, Ohio: Summary level: 150, state:39> county:053> tract:953900> block group:2",0.933537,0.010451,0.0,0.0,0.063559,0.731992,0.204449,36759,0.23399,0.0,3
"Block Group 1, Census Tract 9539, Gallia County, Ohio: Summary level: 150, state:39> county:053> tract:953900> block group:1",0.926049,0.079857,0.0,0.0,0.173184,0.702514,0.124302,49100,0.364,0.0,4


In [13]:
data.to_csv('test.csv', index=False)