In [21]:
# Dependencies
import requests
import pandas as pd
# Import the API key
from config import census_key

In [22]:
# Creating census dataframe using the American Community Survey (ACS) 2021 - reused some of the code used by my previous group in Project 1 for Census API data

# Base URL for census dataset 
base_url = 'https://api.census.gov/data/2021/acs/acsse?'

# Define the parameters
parameter_list = [
    'K200101_001E',  # Total population
    'K200104_002E',  # Population under 18 years
    'K202002_001E',  # Median earnings
]

# Define an empty list to fetch the census data for each parameter
census_data = []

# Loop through parameter list
for parameter in parameter_list:
    query_url = base_url + "get=NAME," + parameter + "&for=state:*" + "&key=" + census_key
    print("Fetching data for parameter:", parameter)
    response = requests.get(query_url).json()
    census_data.append(response)

# Index the first element in census data
df_data = census_data[0]

# Index the first element in df_data, this is the column headers
cols = df_data[0]

# Create a dictionary of the data for the dataframe
rows = [dict(zip(cols, row)) for row in df_data[1:]]

# Create the dataframe
df = pd.DataFrame(rows)

# Rename columns for clarity
df.rename(columns={
    'NAME': 'state',
    'state': 'state_code',
    'place': 'place',
    cols[1]: 'population_total',
}, inplace=True)

# Loop through the census data to add all the parameters
for d in census_data:
    df[d[0][1]] = [row[1] for row in d[1:]]

# Rename additional columns
df.rename(columns={
    'K200104_002E': 'population_under_18',
    'K202002_001E': 'earnings_median',
}, inplace=True)

# Save the DataFrame to a CSV file
df.to_csv("../Resources/census_data_INTERMEDIATE.csv", index=False)

census_final = df[['state', 'population_total', 'population_under_18', 'earnings_median']]


Fetching data for parameter: K200101_001E
Fetching data for parameter: K200104_002E
Fetching data for parameter: K202002_001E


In [23]:
census_final.head()

Unnamed: 0,state,population_total,population_under_18,earnings_median
0,Alabama,5039877,1119942,34925
1,Puerto Rico,3263584,545788,17843
2,Arizona,7276316,1614284,39023
3,Arkansas,3025891,703793,33869
4,California,39237836,8769779,41891


In [24]:
#change variables for population_total, population_under_18, earnings_median to numeric
census_final['population_total'] = pd.to_numeric(census_final['population_total'])
census_final['population_under_18'] = pd.to_numeric(census_final['population_under_18'])
census_final['earnings_median'] = pd.to_numeric(census_final['earnings_median'])


In [25]:
#create an additional column named population_over_18 by subtracting population_under_18 from population_total
census_final['population_over_18'] = census_final['population_total'] - census_final['population_under_18']
census_final.head()


Unnamed: 0,state,population_total,population_under_18,earnings_median,population_over_18
0,Alabama,5039877,1119942,34925,3919935
1,Puerto Rico,3263584,545788,17843,2717796
2,Arizona,7276316,1614284,39023,5662032
3,Arkansas,3025891,703793,33869,2322098
4,California,39237836,8769779,41891,30468057


In [26]:
#save to csv population data
census_final.to_csv("../Resources/census_data_FINALSET.csv", index=False)