In [1]:
import json
import pandas as pd
import os
import numpy as np
import requests
from dotenv import load_dotenv
import pathlib as path

In [2]:
load_dotenv()
api_key = os.getenv("CENSUS_API_KEY")

In [3]:
#All Census api requests start with https://api.census.gov/data
#Then it's the dataset (one of over 1,200), ex: 2018/pep/charagegroups
# Then "?get=" followed by the list of variables and geographies we want data for, ex:
#POP,GEONAME, DATE_DESC&DATE_CODE=11&RACE=10
#Then, set the geography/ Foe ex: "&for=county:*&in=state:24" The * means all available values

#https://api.census.gov/data/2018/pep/charagegroups?get=POP,GEONAME,DATE_DESC&DATE_CODE=11&RACE=10&for=county:*&in=state:24

In [4]:
query_url = f'https://api.census.gov/data/2021/pep/population?get=DENSITY_2021,POP_2021,NAME,STATE,REGION&for=state&key={api_key}'
query = requests.get(query_url).json()
print(json.dumps(query, indent=4))

[
    [
        "DENSITY_2021",
        "POP_2021",
        "NAME",
        "STATE",
        "REGION",
        "state"
    ],
    [
        "58.1171593930",
        "3986639",
        "Oklahoma",
        "40",
        null,
        "40"
    ],
    [
        "25.5629643700",
        "1963692",
        "Nebraska",
        "31",
        null,
        "31"
    ],
    [
        "224.4561379100",
        "1441553",
        "Hawaii",
        "15",
        null,
        "15"
    ],
    [
        "11.8108489860",
        "895376",
        "South Dakota",
        "46",
        null,
        "46"
    ],
    [
        "169.1679021400",
        "6975218",
        "Tennessee",
        "47",
        null,
        "47"
    ],
    [
        "28.6180620940",
        "3143991",
        "Nevada",
        "32",
        null,
        "32"
    ],
    [
        "17.4415070270",
        "2115877",
        "New Mexico",
        "35",
        null,
        "35"
    ],
    [
        "57.1690084670",
        "3193

In [5]:
query_df = pd.DataFrame(query)
query_df.head()
#query_df.describe()

Unnamed: 0,0,1,2,3,4,5
0,DENSITY_2021,POP_2021,NAME,STATE,REGION,state
1,58.1171593930,3986639,Oklahoma,40,,40
2,25.5629643700,1963692,Nebraska,31,,31
3,224.4561379100,1441553,Hawaii,15,,15
4,11.8108489860,895376,South Dakota,46,,46


In [6]:
#query_df

query_df.columns = query_df.iloc[0]
query_df = query_df[1:]
query_df.reset_index(drop=True, inplace=True)
query_df.head()

Unnamed: 0,DENSITY_2021,POP_2021,NAME,STATE,REGION,state
0,58.117159393,3986639,Oklahoma,40,,40
1,25.56296437,1963692,Nebraska,31,,31
2,224.45613791,1441553,Hawaii,15,,15
3,11.810848986,895376,South Dakota,46,,46
4,169.16790214,6975218,Tennessee,47,,47


In [9]:

renamed_query_df = query_df.rename(columns={
    "DENSITY_2021": "PopDensity/SqMi 2021",
    "POP_2021": "Total Population 2021",
    "NAME": "State",
    "STATE": "State Code",
    "REGION": "Region",
    "state": "state"}
)
del renamed_query_df['state']
renamed_query_df["Pop.Density/SqMi 2021"] = renamed_query_df[
    "PopDensity/SqMi 2021"].astype(float)

renamed_query_df["Total Population 2021"] = renamed_query_df[
    "Total Population 2021"].astype(int)

renamed_query_df["State Code"] = renamed_query_df[
    "State Code"].astype(int)

renamed_query_df["PopDensity/SqMi 2021"] = renamed_query_df[
    "PopDensity/SqMi 2021"].astype(float)

renamed_query_df.head()

Unnamed: 0,PopDensity/SqMi 2021,Total Population 2021,State,State Code,Region,Pop.Density/SqMi 2021
0,58.117159,3986639,Oklahoma,40,,58.117159
1,25.562964,1963692,Nebraska,31,,25.562964
2,224.456138,1441553,Hawaii,15,,224.456138
3,11.810849,895376,South Dakota,46,,11.810849
4,169.167902,6975218,Tennessee,47,,169.167902


In [10]:
#re-sort cols:
# 
# final_query_df = final_query_df.sort_values(by="Pop.Change '20-'21", ascending=False)
final_query_df.head()

NameError: name 'final_query_df' is not defined

In [11]:
#reorder cols:

new_col_order = ["State", "State Code", "Pop.Density 2020", "Pop.Density 2021", "Total Pop.2020", "Total Pop.2021", "Pop.Change '20-'21"]
new_col_query_df = renamed_query_df[new_col_order]
new_col_query_df.head()

KeyError: '[\'Pop.Density 2020\', \'Pop.Density 2021\', \'Total Pop.2020\', \'Total Pop.2021\', "Pop.Change \'20-\'21"] not in index'

In [None]:
#Advanced filtering:

housing_url = f"https://api.census.gov/data/2022/acs/acs1?get=NAME,B25082_001E,B25082_002E,B25082_003E&for=county:*&in=state:*&key={api_key}"
print(housing_url)


# Calculate percentages
df['Pct_30_34.9_Mortgage'] = df['B25082_002E'] / df['B25082_001E'] * 100
df['Pct_35Plus_Mortgage'] = df['B25082_003E'] / df['B25082_001E'] * 100

# Filter counties with high cost burden
high_cost_burden = df[(df['Pct_30_34.9_Mortgage'] + df['Pct_35Plus_Mortgage']) > 30]
print(high_cost_burden[['NAME', 'Pct_30_34.9_Mortgage', 'Pct_35Plus_Mortgage']])