# Census - Race

#### This notebook contains code for gathering Census data tables and converting them to dataframes. 
#### Please use caution when altering this code.

In [2]:
import pandas as pd
from census import Census #<-- Python wrapper for census API
import requests
import os
import matplotlib.pyplot as plt
import matplotlib

# Census API Key
from config import api_key

# provide the api key and the year to establish a session
c = Census(api_key, year=2018)

# Set an option to allow up to 300 characters to print in each column
pd.set_option('max_colwidth', 300)

In [3]:
tables = c.acs5.tables()

# The tables variable contains a list of dicts, so we can convert directly to a dataframe
table_df = pd.DataFrame(tables)

##### The cell below gathering columns names from the Census Total Population table that only contain estimate and are int data types.
##### Those table names are added to a string variable and then pulled from the census data.
##### Dictionary key names are replaced withe more meaningful names.
##### Data is saved to a dataframe and exported as a csv file.

In [4]:
table_id = 'B02001'      

# Capture the variables URL from the table_df
url = table_df.loc[table_df['name']==table_id, 'variables'].values[0]

# Make the API call
response = requests.get(url).json()

# convert the response to a DataFrame
variables = pd.DataFrame(response['variables']).transpose()

print(f"Number of available variables: {len(variables)}")

table_info = variables[(variables['predicateType']=='int') & (variables['label'].str.contains("Estimate"))]   


table_columns = ""
for ind in table_info.index:       
    table_columns = table_columns + f"{ind},"
    
    
column_list = table_columns[:-1]
census_data = c.acs5.get(("NAME", column_list), 
                          {'for': 'county:*'})

for i in range(len(table_info)) : 
    for d in census_data:
        d[table_info.iloc[i, 0]] = d.pop(table_info.index[i]) 
    

census_B02001_Race_df = pd.DataFrame(census_data)
census_B02001_Race_df.to_csv("census_B02001_Race.csv", encoding="utf-8", index=False )
census_B02001_Race_df

Number of available variables: 40


Unnamed: 0,NAME,state,county,Estimate!!Total!!Two or more races!!Two races including Some other race,Estimate!!Total!!Some other race alone,Estimate!!Total!!Two or more races,"Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races",Estimate!!Total!!White alone,Estimate!!Total,Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Total!!Asian alone,Estimate!!Total!!American Indian and Alaska Native alone,Estimate!!Total!!Black or African American alone
0,"Washington County, Mississippi",28,151,74.0,391.0,278.0,204.0,12097.0,47086.0,0.0,282.0,56.0,33982.0
1,"Perry County, Mississippi",28,111,5.0,16.0,152.0,147.0,9475.0,12028.0,0.0,17.0,0.0,2368.0
2,"Choctaw County, Mississippi",28,019,0.0,13.0,35.0,35.0,5631.0,8321.0,8.0,15.0,23.0,2596.0
3,"Itawamba County, Mississippi",28,057,12.0,170.0,144.0,132.0,21346.0,23480.0,7.0,57.0,75.0,1681.0
4,"Carroll County, Mississippi",28,015,15.0,0.0,97.0,82.0,6523.0,10129.0,0.0,0.0,6.0,3503.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,"Clayton County, Iowa",19,043,33.0,56.0,105.0,72.0,17215.0,17672.0,0.0,79.0,36.0,181.0
3216,"Buena Vista County, Iowa",19,021,91.0,1531.0,239.0,148.0,15684.0,20260.0,97.0,2155.0,3.0,551.0
3217,"Guthrie County, Iowa",19,077,9.0,67.0,169.0,160.0,10372.0,10674.0,0.0,27.0,36.0,3.0
3218,"Humboldt County, Iowa",19,091,0.0,52.0,93.0,93.0,9315.0,9566.0,0.0,40.0,41.0,25.0


### Census Race Column Changes

In [5]:
census_B02001_Race_df.dtypes

census_B02001_Race_df['COUNTYFP'] = census_B02001_Race_df['state'] + census_B02001_Race_df['county']

census_Race_sorted = census_B02001_Race_df.sort_values(by=['COUNTYFP'])
census_Race_sorted = census_Race_sorted.reset_index(drop=True)

census_Race_sorted['State Name'] = ''
census_Race_sorted['County Name'] = ''


for index, row in census_Race_sorted.iterrows():
    name_all = row['NAME']
    name_list = name_all.split(',')
    clean_state = name_list[1][1:]
    census_Race_sorted.loc[index, 'State Name']= clean_state
    census_Race_sorted.loc[index, 'County Name']= name_list[0]
    
census_Race_sorted = census_Race_sorted.rename()

array(['NAME', 'state', 'county',
       'Estimate!!Total!!Two or more races!!Two races including Some other race',
       'Estimate!!Total!!Some other race alone',
       'Estimate!!Total!!Two or more races',
       'Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races',
       'Estimate!!Total!!White alone', 'Estimate!!Total',
       'Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone',
       'Estimate!!Total!!Asian alone',
       'Estimate!!Total!!American Indian and Alaska Native alone',
       'Estimate!!Total!!Black or African American alone', 'COUNTYFP',
       'State Name', 'County Name'], dtype=object)