In [3]:
# Dependencies
from census import Census
from api_keys import api_key
import hvplot.pandas
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

# Create an instance of the Census library
year = 2020  # Define the year to include it as a column
c = Census(api_key, year=year)

In [5]:
# List of Census variables to fetch
variables = [
    "B25003_002E",  # Owner-occupied units
    "B25003_001E",  # Total occupied units
    "B25064_001E",  # Median gross rent
    "B25001_001E",  # Total housing units
    "B19013_001E",  # Median household income
    "B25088_002E",  # Median monthly owner costs
    "B01003_001E",  # Total population

    # Additional age ranges (B25007)
    "B25007_002E",  # Householder under 25 years
    "B25007_003E",  # Householder 25-34 years
    "B25007_004E",  # Householder 35-44 years
    "B25007_005E",  # Householder 45-54 years
    "B25007_006E",  # Householder 55-64 years
    "B25007_007E",  # Householder 65-74 years
    "B25007_008E",  # Householder 75 years and over

    # Additional race categories (B25006)
    "B25006_002E",  # White alone
    "B25006_003E",  # Black or African American alone
    "B25006_004E",  # American Indian and Alaska Native alone
    "B25006_005E",  # Asian alone
    "B25006_006E",  # Native Hawaiian and Other Pacific Islander alone
    "B25006_007E",  # Some other race alone
    "B25006_008E",  # Two or more races

    # Additional poverty status (B17021)
    "B17021_002E"   # Count of individuals whose income in the past 12 months is below the poverty level
]


# Fetch data at both the ZIP code and state levels
census_data = c.acs5.get(
    variables,
    {'for': 'zip code tabulation area:*'}
)

# Add "zip code tabulation area" and "state" to the columns list
columns = variables + ['zip code tabulation area']

# Create the DataFrame
census_pd = pd.DataFrame(census_data, columns=columns)
census_pd['Year'] = year

# Add the "year" column
census_pd['Year'] = year

In [6]:
# Rename Columns
columns_mapping = {
    "B25003_002E": "Owner_Occupied_Units",
    "B25003_001E": "Total_Occupied_Units",
    "B25064_001E": "Median_Gross_Rent",
    "B25001_001E": "Total_Housing_Units",
    "B19013_001E": "Median_Household_Income",
    "B25088_002E": "Median_Monthly_Owner_Costs",
    "B01003_001E": "Total_Population",

    # Additional age ranges
    "B25007_002E": "Householder_Under_25",
    "B25007_003E": "Householder_25_34",
    "B25007_004E": "Householder_35_44",
    "B25007_005E": "Householder_45_54",
    "B25007_006E": "Householder_55_64",
    "B25007_007E": "Householder_65_74",
    "B25007_008E": "Householder_75_Over",

    # Additional race categories
    "B25006_002E": "Householder_White",
    "B25006_003E": "Householder_Black_AfricanAmerican",
    "B25006_004E": "Householder_AmericanIndian_AlaskaNative",
    "B25006_005E": "Householder_Asian",
    "B25006_006E": "Householder_NativeHawaiian_PacificIslander",
    "B25006_007E": "Householder_OtherRace",
    "B25006_008E": "Householder_TwoOrMoreRaces",

    # Additional poverty status
    "B17021_002E": "Below_Poverty_Level",

    # Geographic columns
    "zip code tabulation area": "Zip_Code",
    "state": "State"
}

# Rename the columns
census_pd.rename(columns=columns_mapping, inplace=True)

census_pd.head()

Unnamed: 0,Owner_Occupied_Units,Total_Occupied_Units,Median_Gross_Rent,Total_Housing_Units,Median_Household_Income,Median_Monthly_Owner_Costs,Total_Population,Householder_Under_25,Householder_25_34,Householder_35_44,...,Householder_White,Householder_Black_AfricanAmerican,Householder_AmericanIndian_AlaskaNative,Householder_Asian,Householder_NativeHawaiian_PacificIslander,Householder_OtherRace,Householder_TwoOrMoreRaces,Below_Poverty_Level,Zip_Code,Year
0,3562.0,5555.0,385.0,7282.0,14398.0,747.0,16773.0,3562.0,18.0,105.0,...,4175.0,34.0,5.0,0.0,0.0,1187.0,154.0,10770.0,601,2020
1,9867.0,12901.0,393.0,17510.0,16771.0,855.0,37083.0,9867.0,21.0,299.0,...,9850.0,326.0,41.0,0.0,0.0,389.0,2295.0,17965.0,602,2020
2,11098.0,19431.0,418.0,24453.0,15786.0,814.0,45652.0,11098.0,60.0,429.0,...,15012.0,888.0,40.0,20.0,0.0,2635.0,836.0,22895.0,603,2020
3,1490.0,1983.0,353.0,2789.0,14980.0,549.0,6231.0,1490.0,0.0,9.0,...,1016.0,6.0,11.0,0.0,0.0,860.0,90.0,3701.0,606,2020
4,6576.0,8864.0,433.0,12454.0,20167.0,733.0,26502.0,6576.0,20.0,440.0,...,6517.0,180.0,0.0,0.0,0.0,453.0,1714.0,11966.0,610,2020


In [7]:
# Save the DataFrame as a CSV file
census_pd.to_csv('2020_census_data_by_zip.csv', index=False)