In [12]:
# Dependencies
import requests
import numpy as np
import pandas as pd
from census import Census
import matplotlib.pyplot as plt

In [13]:
# Import U.S. Census API Key
from config import api_key

# Create an instance of the Census library
c = Census(
    api_key,
    year = 2021
)

In [36]:
slc_zipcodes = ['84054, 84087, 84101, 84102, 84103, 84104, 84105, 84106, 84107, 84108, 84109, 84111, 84112, 84113, 84115, 84116, 84117, 84118, 84119, 84120, 84123, 84128, 84129, 84132, 84144']

# Define the zip code groups
north_salt_lake_zipcodes = ['84054', '84087']
salt_lake_city_zipcodes = ['84101', '84102', '84103', '84104', '84105', '84107', '84108', '84109', '84111', '84112', '84113', '84116', '84117', '84118', '84120', '84128', '84132', '84144']
south_salt_lake_county_unified_pd_zipcodes = ['84119', '84123', '84129']
south_salt_lake_zipcodes = ['84106', '84115']
# Run Census Search to retrieve data for Utah and Salt Lake City (2021 ACS5 Census)
census_data = c.acs5.get(
    (
        "NAME",
        "B19013_001E",
        "B01003_001E",
        "B01002_001E",
        "B19301_001E",
        "B17001_002E"
    ),
    {'for': 'zip code tabulation area:' + ','.join(north_salt_lake_zipcodes + salt_lake_city_zipcodes + south_salt_lake_county_unified_pd_zipcodes + south_salt_lake_zipcodes)}
)
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)
# Column renaming
census_pd = census_pd.rename(
    columns={
        "B01003_001E": "Population",
        "B01002_001E": "Median Age",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B17001_002E": "Poverty Count",
        "NAME": "Name",
        "zip code tabulation area": "Zipcode"
    }
)
# Add a Poverty Rate column (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"].astype(int) / census_pd["Population"].astype(int)

# Define a function to group zipcodes
def group_zipcode(zipcode):
    if zipcode in north_salt_lake_zipcodes:
        return 'North Salt Lake'
    elif zipcode in salt_lake_city_zipcodes:
        return 'Salt Lake City'
    elif zipcode in south_salt_lake_county_unified_pd_zipcodes:
        return 'South Salt Lake County Unified PD'
    elif zipcode in south_salt_lake_zipcodes:
        return 'South Salt Lake'
    else:
        return 'Other'
# Apply the group_zipcode function to the Name column to create a new column called Group
census_pd['Group'] = census_pd['Zipcode'].apply(group_zipcode)
# Configure the final DataFrame
census_pd = census_pd[
    [
        "Name",
        "Zipcode",
        "Group",
        "Population",
        "Median Age",
        "Household Income",
        "Per Capita Income",
        "Poverty Count",
        "Poverty Rate"
    ]
]
# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_pd)}")
census_pd

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_pd)}")
census_pd.head()

Number of rows in the DataFrame: 23
Number of rows in the DataFrame: 23


Unnamed: 0,Name,Zipcode,Group,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,ZCTA5 84054,84054,North Salt Lake,21443.0,30.8,85213.0,35917.0,628.0,2.928695
1,ZCTA5 84087,84087,North Salt Lake,16133.0,34.4,102639.0,37678.0,949.0,5.882353
2,ZCTA5 84101,84101,Salt Lake City,6422.0,35.8,50000.0,52194.0,894.0,13.920897
3,ZCTA5 84102,84102,Salt Lake City,18995.0,28.9,46906.0,38383.0,5188.0,27.312451
4,ZCTA5 84103,84103,Salt Lake City,22888.0,36.8,73911.0,68737.0,2248.0,9.821741


In [37]:
# Save the DataFrame as a CSV
# Note: To avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)