In [1]:
# Dependencies
import requests
import pandas as pd
from census import Census
import json
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Import U.S. Census API Key
from config import census_key

# Create an instance of the Census library
c = Census(
    census_key,
    year = 2022
)

In [3]:
# Run Census Search to retrieve data on all states (2022 ACS1 Census)
census_data = c.acs1.get(
    (
        "NAME",
        "B01003_001E",
        "B01001_002E",
        "B01001_026E",
        "B02001_002E",
        "B02001_003E",
        "B02001_004E",
        "B02001_005E",
        "B02001_006E",
        "B03001_003E",
        "B19301_001E",
        "B17001_002E"
    ),
    {'for': 'state:*'}
)

# Convert to DataFrame
population_2022 = pd.DataFrame(census_data)

# Column renaming
population_2022 = population_2022.rename(
    columns = {
        "B01003_001E": "Population",
        "B01001_002E": "Male",
        "B01001_026E": "Female",
        "B02001_002E": "White",
        "B02001_003E": "Black",
        "B02001_004E": "Native",
        "B02001_005E": "Asian",
        "B02001_006E": "Pacific",
        "B03001_003E": "Hispanic",
        "B19301_001E": "Per_Capita_Income",
        "B17001_002E": "Poverty_Count",
        "NAME": "Name"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
population_2022["Poverty_Rate"] = 100 * population_2022["Poverty_Count"].astype(int) / population_2022["Population"].astype(int)


# Configure the final DataFrame
population_2022 = population_2022[
    [
        "Name",
        "Population",
        "Male",
        "Female",
        "White",
        "Black",
        "Native",
        "Asian",
        "Pacific",
        "Hispanic",
        "Per_Capita_Income",
        "Poverty_Count",
        "Poverty_Rate"
    ]
]

# Rename name to state
population_2022 = population_2022.rename(columns={"Name":"State"})

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(population_2022)}")
population_2022

Number of rows in the DataFrame: 52


Unnamed: 0,State,Population,Male,Female,White,Black,Native,Asian,Pacific,Hispanic,Per_Capita_Income,Poverty_Count,Poverty_Rate
0,Alabama,5074296.0,2461248.0,2613048.0,3302528.0,1302035.0,24400.0,78893.0,4977.0,246477.0,33777.0,800395.0,15.773518
1,Alaska,733583.0,385667.0,347916.0,437533.0,22202.0,95268.0,46184.0,14724.0,56491.0,43054.0,78608.0,10.715625
2,Arizona,7359197.0,3678381.0,3680816.0,4254015.0,340760.0,301909.0,266441.0,15099.0,2388520.0,39819.0,897852.0,12.200407
3,Arkansas,3045637.0,1504488.0,1541149.0,2103849.0,437331.0,22250.0,48921.0,14229.0,255416.0,31380.0,496311.0,16.295803
4,California,39029342.0,19536425.0,19492917.0,15175598.0,2121422.0,504589.0,6054038.0,159920.0,15732184.0,46661.0,4670324.0,11.966187
5,Colorado,5839926.0,2960896.0,2879030.0,4106707.0,235519.0,71351.0,190181.0,6916.0,1314962.0,49071.0,540517.0,9.255545
6,Connecticut,3626205.0,1776689.0,1849516.0,2357855.0,380229.0,10269.0,175069.0,1214.0,658978.0,51581.0,345695.0,9.533245
7,Delaware,1018396.0,494657.0,523739.0,610378.0,224761.0,3670.0,41501.0,86.0,105300.0,42571.0,93285.0,9.159993
8,District of Columbia,671803.0,319763.0,352040.0,257963.0,282578.0,3130.0,27961.0,836.0,78911.0,71699.0,85676.0,12.753143
9,Florida,22244823.0,10953468.0,11291355.0,12445576.0,3345420.0,90848.0,640512.0,12446.0,6025039.0,40278.0,2762679.0,12.419425


In [4]:
# Format income and poverty rate 
population_2022["Per_Capita_Income"] = population_2022["Per_Capita_Income"].map("{:.2f}".format)
population_2022["Poverty_Rate"] = population_2022["Poverty_Rate"].map("{:,.2f}".format)

population_2022

Unnamed: 0,State,Population,Male,Female,White,Black,Native,Asian,Pacific,Hispanic,Per_Capita_Income,Poverty_Count,Poverty_Rate
0,Alabama,5074296.0,2461248.0,2613048.0,3302528.0,1302035.0,24400.0,78893.0,4977.0,246477.0,33777.0,800395.0,15.77
1,Alaska,733583.0,385667.0,347916.0,437533.0,22202.0,95268.0,46184.0,14724.0,56491.0,43054.0,78608.0,10.72
2,Arizona,7359197.0,3678381.0,3680816.0,4254015.0,340760.0,301909.0,266441.0,15099.0,2388520.0,39819.0,897852.0,12.2
3,Arkansas,3045637.0,1504488.0,1541149.0,2103849.0,437331.0,22250.0,48921.0,14229.0,255416.0,31380.0,496311.0,16.3
4,California,39029342.0,19536425.0,19492917.0,15175598.0,2121422.0,504589.0,6054038.0,159920.0,15732184.0,46661.0,4670324.0,11.97
5,Colorado,5839926.0,2960896.0,2879030.0,4106707.0,235519.0,71351.0,190181.0,6916.0,1314962.0,49071.0,540517.0,9.26
6,Connecticut,3626205.0,1776689.0,1849516.0,2357855.0,380229.0,10269.0,175069.0,1214.0,658978.0,51581.0,345695.0,9.53
7,Delaware,1018396.0,494657.0,523739.0,610378.0,224761.0,3670.0,41501.0,86.0,105300.0,42571.0,93285.0,9.16
8,District of Columbia,671803.0,319763.0,352040.0,257963.0,282578.0,3130.0,27961.0,836.0,78911.0,71699.0,85676.0,12.75
9,Florida,22244823.0,10953468.0,11291355.0,12445576.0,3345420.0,90848.0,640512.0,12446.0,6025039.0,40278.0,2762679.0,12.42


In [5]:
population_2022.to_csv("Output Data/population2022.csv", encoding="utf-8", index=False)

In [6]:
result = population_2022.to_json(orient="records")
parsed = json.loads(result)
population_2022 = json.dumps(parsed, indent=4)
print(population_2022)

[
    {
        "State": "Alabama",
        "Population": 5074296.0,
        "Male": 2461248.0,
        "Female": 2613048.0,
        "White": 3302528.0,
        "Black": 1302035.0,
        "Native": 24400.0,
        "Asian": 78893.0,
        "Pacific": 4977.0,
        "Hispanic": 246477.0,
        "Per_Capita_Income": "33777.00",
        "Poverty_Count": 800395.0,
        "Poverty_Rate": "15.77"
    },
    {
        "State": "Alaska",
        "Population": 733583.0,
        "Male": 385667.0,
        "Female": 347916.0,
        "White": 437533.0,
        "Black": 22202.0,
        "Native": 95268.0,
        "Asian": 46184.0,
        "Pacific": 14724.0,
        "Hispanic": 56491.0,
        "Per_Capita_Income": "43054.00",
        "Poverty_Count": 78608.0,
        "Poverty_Rate": "10.72"
    },
    {
        "State": "Arizona",
        "Population": 7359197.0,
        "Male": 3678381.0,
        "Female": 3680816.0,
        "White": 4254015.0,
        "Black": 340760.0,
        "Native": 30

In [7]:
# Run Census Search to retrieve data on all states (2022 ACS1 Census)
census_data = c.acs1.get(
    (
        "NAME",
        "B01003_001E",
        "B01001_002E",
        "B01001_026E",
        "B02001_002E",
        "B02001_003E",
        "B02001_004E",
        "B02001_005E",
        "B02001_006E",
        "B03001_003E",
        "B19301_001E",
        "B17001_002E"
    ),
    {'for': 'county:*'}
)

# Convert to DataFrame
county_pop_2022 = pd.DataFrame(census_data)

# Column renaming
county_pop_2022 = county_pop_2022.rename(
    columns = {
        "B01003_001E": "Population",
        "B01001_002E": "Male",
        "B01001_026E": "Female",
        "B02001_002E": "White",
        "B02001_003E": "Black",
        "B02001_004E": "Native",
        "B02001_005E": "Asian",
        "B02001_006E": "Pacific",
        "B03001_003E": "Hispanic",
        "B19301_001E": "Per_Capita_Income",
        "B17001_002E": "Poverty_Count",
        "county": "County",
        "NAME": "Name"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
county_pop_2022["Poverty_Rate"] = 100 * county_pop_2022["Poverty_Count"].astype(int) / county_pop_2022["Population"].astype(int)


# Configure the final DataFrame
county_pop_2022 = county_pop_2022[
    [
        "County",
        "Name",
        "Population",
        "Male",
        "Female",
        "White",
        "Black",
        "Native",
        "Asian",
        "Pacific",
        "Hispanic",
        "Per_Capita_Income",
        "Poverty_Count",
        "Poverty_Rate"
    ]
]

# Rename name to state
county_pop_2022 = county_pop_2022.rename(columns={"Name":"County_State"})

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(county_pop_2022)}")
county_pop_2022

Number of rows in the DataFrame: 848


Unnamed: 0,County,County_State,Population,Male,Female,White,Black,Native,Asian,Pacific,Hispanic,Per_Capita_Income,Poverty_Count,Poverty_Rate
0,003,"Baldwin County, Alabama",246435.0,120664.0,125771.0,204709.0,17251.0,1012.0,2363.0,111.0,,38198.0,32313.0,13.112180
1,015,"Calhoun County, Alabama",115788.0,56554.0,59234.0,80477.0,25960.0,260.0,1372.0,122.0,,28701.0,20450.0,17.661588
2,043,"Cullman County, Alabama",90665.0,45051.0,45614.0,82050.0,1127.0,184.0,264.0,47.0,,32189.0,16019.0,17.668339
3,049,"DeKalb County, Alabama",71998.0,36024.0,35974.0,57615.0,885.0,2726.0,132.0,1877.0,,24633.0,13967.0,19.399150
4,051,"Elmore County, Alabama",89563.0,43448.0,46115.0,64770.0,18477.0,196.0,423.0,0.0,,33833.0,9133.0,10.197291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
843,113,"Ponce Municipio, Puerto Rico",132138.0,62755.0,69383.0,31460.0,5326.0,218.0,137.0,0.0,,12118.0,68118.0,51.550652
844,127,"San Juan Municipio, Puerto Rico",334776.0,152697.0,182079.0,89419.0,41340.0,1225.0,907.0,93.0,,25025.0,126728.0,37.854565
845,135,"Toa Alta Municipio, Puerto Rico",66041.0,30992.0,35049.0,4666.0,1178.0,0.0,412.0,0.0,,20463.0,18491.0,27.999273
846,137,"Toa Baja Municipio, Puerto Rico",72783.0,34120.0,38663.0,5151.0,3086.0,0.0,0.0,0.0,,16903.0,24484.0,33.639724


In [8]:
# Format income and poverty rate 
county_pop_2022["Per_Capita_Income"] = county_pop_2022["Per_Capita_Income"].map("{:.2f}".format)
county_pop_2022["Poverty_Rate"] = county_pop_2022["Poverty_Rate"].map("{:,.2f}".format)

county_pop_2022

Unnamed: 0,County,County_State,Population,Male,Female,White,Black,Native,Asian,Pacific,Hispanic,Per_Capita_Income,Poverty_Count,Poverty_Rate
0,003,"Baldwin County, Alabama",246435.0,120664.0,125771.0,204709.0,17251.0,1012.0,2363.0,111.0,,38198.00,32313.0,13.11
1,015,"Calhoun County, Alabama",115788.0,56554.0,59234.0,80477.0,25960.0,260.0,1372.0,122.0,,28701.00,20450.0,17.66
2,043,"Cullman County, Alabama",90665.0,45051.0,45614.0,82050.0,1127.0,184.0,264.0,47.0,,32189.00,16019.0,17.67
3,049,"DeKalb County, Alabama",71998.0,36024.0,35974.0,57615.0,885.0,2726.0,132.0,1877.0,,24633.00,13967.0,19.40
4,051,"Elmore County, Alabama",89563.0,43448.0,46115.0,64770.0,18477.0,196.0,423.0,0.0,,33833.00,9133.0,10.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
843,113,"Ponce Municipio, Puerto Rico",132138.0,62755.0,69383.0,31460.0,5326.0,218.0,137.0,0.0,,12118.00,68118.0,51.55
844,127,"San Juan Municipio, Puerto Rico",334776.0,152697.0,182079.0,89419.0,41340.0,1225.0,907.0,93.0,,25025.00,126728.0,37.85
845,135,"Toa Alta Municipio, Puerto Rico",66041.0,30992.0,35049.0,4666.0,1178.0,0.0,412.0,0.0,,20463.00,18491.0,28.00
846,137,"Toa Baja Municipio, Puerto Rico",72783.0,34120.0,38663.0,5151.0,3086.0,0.0,0.0,0.0,,16903.00,24484.0,33.64


In [9]:
county_pop_2022.to_csv("Output Data/county_pop2022.csv", encoding="utf-8", index=False)

In [10]:
result = county_pop_2022.to_json(orient="records")
parsed = json.loads(result)
county_pop_2022 = json.dumps(parsed, indent=4)
print(county_pop_2022)

[
    {
        "County": "003",
        "County_State": "Baldwin County, Alabama",
        "Population": 246435.0,
        "Male": 120664.0,
        "Female": 125771.0,
        "White": 204709.0,
        "Black": 17251.0,
        "Native": 1012.0,
        "Asian": 2363.0,
        "Pacific": 111.0,
        "Hispanic": null,
        "Per_Capita_Income": "38198.00",
        "Poverty_Count": 32313.0,
        "Poverty_Rate": "13.11"
    },
    {
        "County": "015",
        "County_State": "Calhoun County, Alabama",
        "Population": 115788.0,
        "Male": 56554.0,
        "Female": 59234.0,
        "White": 80477.0,
        "Black": 25960.0,
        "Native": 260.0,
        "Asian": 1372.0,
        "Pacific": 122.0,
        "Hispanic": null,
        "Per_Capita_Income": "28701.00",
        "Poverty_Count": 20450.0,
        "Poverty_Rate": "17.66"
    },
    {
        "County": "043",
        "County_State": "Cullman County, Alabama",
        "Population": 90665.0,
        "Male