In [7]:
# Dependencies
from census import Census
from config import api_key
import hvplot.pandas
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

# Create an instance of the Census library
year = 2021  # Define the year to include it as a column
c = Census(api_key, year=year)

In [8]:
# List of Census variables to fetch
variables = [
    "B25003_002E",  # Owner-occupied units
    "B25003_001E",  # Total occupied units
    "B25064_001E",  # Median gross rent
    "B25001_001E",  # Total housing units
    "B19013_001E",  # Median household income
    "B25088_002E",  # Median monthly owner costs
    "B01003_001E",  # Total population

    # Additional age ranges (B25007)
    "B25007_002E",  # Householder under 25 years
    "B25007_003E",  # Householder 25-34 years
    "B25007_004E",  # Householder 35-44 years
    "B25007_005E",  # Householder 45-54 years
    "B25007_006E",  # Householder 55-64 years
    "B25007_007E",  # Householder 65-74 years
    "B25007_008E",  # Householder 75 years and over

    # Additional race categories (B25006)
    "B25006_002E",  # White alone
    "B25006_003E",  # Black or African American alone
    "B25006_004E",  # American Indian and Alaska Native alone
    "B25006_005E",  # Asian alone
    "B25006_006E",  # Native Hawaiian and Other Pacific Islander alone
    "B25006_007E",  # Some other race alone
    "B25006_008E",  # Two or more races

    # Additional poverty status (B17021)
    "B17021_002E"   # Count of individuals whose income in the past 12 months is below the poverty level
]


# Fetch data at both the ZIP code and state levels
census_data = c.acs5.get(
    variables,
    {'for': 'zip code tabulation area:*'}
)

# Add "zip code tabulation area" and "state" to the columns list
columns = variables + ['zip code tabulation area']

# Create the DataFrame
census_pd = pd.DataFrame(census_data, columns=columns)
census_pd['Year'] = year

# Add the "year" column
census_pd['Year'] = year


In [9]:
# Rename Columns
columns_mapping = {
    "B25003_002E": "Owner_Occupied_Units",
    "B25003_001E": "Total_Occupied_Units",
    "B25064_001E": "Median_Gross_Rent",
    "B25001_001E": "Total_Housing_Units",
    "B19013_001E": "Median_Household_Income",
    "B25088_002E": "Median_Monthly_Owner_Costs",
    "B01003_001E": "Total_Population",

    # Additional age ranges
    "B25007_002E": "Householder_Under_25",
    "B25007_003E": "Householder_25_34",
    "B25007_004E": "Householder_35_44",
    "B25007_005E": "Householder_45_54",
    "B25007_006E": "Householder_55_64",
    "B25007_007E": "Householder_65_74",
    "B25007_008E": "Householder_75_Over",

    # Additional race categories
    "B25006_002E": "Householder_White",
    "B25006_003E": "Householder_Black_AfricanAmerican",
    "B25006_004E": "Householder_AmericanIndian_AlaskaNative",
    "B25006_005E": "Householder_Asian",
    "B25006_006E": "Householder_NativeHawaiian_PacificIslander",
    "B25006_007E": "Householder_OtherRace",
    "B25006_008E": "Householder_TwoOrMoreRaces",

    # Additional poverty status
    "B17021_002E": "Below_Poverty_Level",

    # Geographic columns
    "zip code tabulation area": "Zip_Code",
    "state": "State"
}

# Rename the columns
census_pd.rename(columns=columns_mapping, inplace=True)

census_pd.head()

Unnamed: 0,Owner_Occupied_Units,Total_Occupied_Units,Median_Gross_Rent,Total_Housing_Units,Median_Household_Income,Median_Monthly_Owner_Costs,Total_Population,Householder_Under_25,Householder_25_34,Householder_35_44,...,Householder_White,Householder_Black_AfricanAmerican,Householder_AmericanIndian_AlaskaNative,Householder_Asian,Householder_NativeHawaiian_PacificIslander,Householder_OtherRace,Householder_TwoOrMoreRaces,Below_Poverty_Level,Zip_Code,Year
0,3553.0,5397.0,377.0,7306.0,15292.0,765.0,17126.0,3553.0,24.0,77.0,...,4640.0,28.0,4.0,0.0,0.0,500.0,225.0,11302.0,601,2021
1,9782.0,12858.0,414.0,17311.0,18716.0,907.0,37895.0,9782.0,0.0,280.0,...,7735.0,222.0,12.0,0.0,0.0,501.0,4388.0,17121.0,602,2021
2,11254.0,19295.0,409.0,24771.0,16789.0,814.0,49136.0,11254.0,39.0,339.0,...,14013.0,883.0,26.0,19.0,0.0,2836.0,1518.0,23617.0,603,2021
3,1440.0,1968.0,350.0,2786.0,18835.0,554.0,5751.0,1440.0,0.0,10.0,...,1053.0,0.0,11.0,0.0,0.0,793.0,111.0,3139.0,606,2021
4,6452.0,8934.0,478.0,12494.0,21239.0,776.0,26153.0,6452.0,9.0,366.0,...,4907.0,198.0,0.0,0.0,0.0,571.0,3258.0,11640.0,610,2021


In [10]:
# Save the DataFrame as a CSV file
census_pd.to_csv('2021_census_data_by_zip.csv', index=False)