In [22]:
import pandas as pd
import numpy as np
import requests
import json

import matplotlib.pyplot as plt
from scipy.stats import linregress

from pathlib import Path
from census import Census
from api_keys import census_key

In [2]:
CA_cases_per_100_path = Path("Datasets/CA_adults_with_diabetes_per_100.csv")
CA_cases_per_100_data = pd.read_csv(CA_cases_per_100_path)

In [3]:
CA_cases_per_100_data.head()

Unnamed: 0,Geography,Year,Strata,Strata Name,Percent,Lower 95% CL,Upper 95% CL,Standard Error
0,California,2018,Total population,Total population,10.4,8.9,11.9,0.8
1,California,2018,Race-Ethnicity,White,8.4,6.9,9.9,0.8
2,California,2018,Race-Ethnicity,African-American,12.3,6.0,18.6,3.2
3,California,2018,Race-Ethnicity,Asian,8.5,3.2,13.9,2.7
4,California,2018,Race-Ethnicity,Hispanic,12.1,9.0,15.1,1.6


In [4]:
FRED_PPIC_Diabetes_path = Path("Datasets/FRED_PPIC_Diabetes.csv")
FRED_PPIC_Diabetes_data = pd.read_csv(FRED_PPIC_Diabetes_path)

In [5]:
FRED_PPIC_Diabetes_data.head()

Unnamed: 0,DATE,WPU06380103
0,2009-12-01,100.0
1,2010-01-01,99.8
2,2010-02-01,102.4
3,2010-03-01,103.0
4,2010-04-01,103.0


In [6]:
OWID_diabetes_vs_gdp_path = Path("Datasets/OWID_diabetes-prevalence-vs-gdp-per-capita.csv")
OWID_diabetes_vs_gdp_data = pd.read_csv(OWID_diabetes_vs_gdp_path)

In [7]:
OWID_diabetes_vs_gdp_data.head()

Unnamed: 0,Entity,Code,Year,Diabetes prevalence (% of population ages 20 to 79),"GDP per capita, PPP (constant 2017 international $)",Population (historical estimates),Continent
0,Abkhazia,OWID_ABK,2015,,,,Asia
1,Afghanistan,AFG,2011,7.6,1961.0963,29249156.0,
2,Afghanistan,AFG,2021,10.9,1516.3057,40099460.0,
3,Afghanistan,AFG,2002,,1280.4631,21000258.0,
4,Afghanistan,AFG,2003,,1292.3335,22645136.0,


In [8]:
weekly_deaths_countries_path = Path("Datasets/Weekly_Counts_of_Death_by_Diabetes.csv")
weekly_deaths_countries_data = pd.read_csv(weekly_deaths_countries_path)

In [9]:
weekly_deaths_countries_data.head()

Unnamed: 0,Jurisdiction,Week Ending Date,State Abbreviation,Year,Week,Cause Group,Number of Deaths,Cause Subgroup,Time Period,Suppress,Note,Average Number of Deaths in Time Period,Difference from 2015-2019 to 2020,Percent Difference from 2015-2019 to 2020,Type
0,Alabama,01/10/2015,AL,2015,1,Alzheimer disease and dementia,120,Alzheimer disease and dementia,2015-2019,,,103,,,Predicted (weighted)
1,Alabama,01/10/2015,AL,2015,1,Alzheimer disease and dementia,120,Alzheimer disease and dementia,2015-2019,,,103,,,Unweighted
2,Alabama,01/09/2016,AL,2016,1,Alzheimer disease and dementia,76,Alzheimer disease and dementia,2015-2019,,,103,,,Predicted (weighted)
3,Alabama,01/09/2016,AL,2016,1,Alzheimer disease and dementia,76,Alzheimer disease and dementia,2015-2019,,,103,,,Unweighted
4,Alabama,01/07/2017,AL,2017,1,Alzheimer disease and dementia,96,Alzheimer disease and dementia,2015-2019,,,103,,,Predicted (weighted)


In [26]:
income_variable = "B19013_001E"
employment_employed_variable = "B23025_004E"
poverty_variable = "B17001_002E"
population_variable = "B01003_001E"

In [27]:
url = f"https://api.census.gov/data/2019/acs/acs5?get=NAME,{population_variable},{income_variable},{employment_employed_variable},{poverty_variable}&for=state:*&key={census_key}"

census_data = requests.get(url).json()
print(json.dumps(census_data, indent=4, sort_keys=True))

[
    [
        "NAME",
        "B01003_001E",
        "B19013_001E",
        "B23025_004E",
        "B17001_002E",
        "state"
    ],
    [
        "Alabama",
        "4876250",
        "50536",
        "2097384",
        "795989",
        "01"
    ],
    [
        "Alaska",
        "737068",
        "77640",
        "347774",
        "76933",
        "02"
    ],
    [
        "Arizona",
        "7050299",
        "58945",
        "3130658",
        "1043764",
        "04"
    ],
    [
        "Arkansas",
        "2999370",
        "47597",
        "1303490",
        "496260",
        "05"
    ],
    [
        "California",
        "39283497",
        "75235",
        "18591241",
        "5149742",
        "06"
    ],
    [
        "Colorado",
        "5610349",
        "72331",
        "2904589",
        "565873",
        "08"
    ],
    [
        "Delaware",
        "957248",
        "68287",
        "455620",
        "109400",
        "10"
    ],
    [
        "District of Colu

In [25]:
#for loop through years
#convert to dataframe