We are getting the provisional death counts for Coronavirus via API through data.cdc.gov


In [1]:
import pandas as pd
import requests
import json

In [2]:
url = "https://data.cdc.gov/resource/hc4f-j6nb.json"
response = requests.get(url)


In [3]:
r_js = response.json()

We pour our data into a dataframe

In [4]:
df_dac = pd.DataFrame(r_js)
df_dac.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths,percent_expected_deaths,pneumonia_deaths,pneumonia_and_covid_deaths
0,By week,Total Deaths,3307,490389,0.85,28475,1410
1,By week,2/1/2020,0,56266,0.95,3002,0
2,By week,2/8/2020,0,56587,0.95,3002,0
3,By week,2/15/2020,0,55189,0.94,2910,0
4,By week,2/22/2020,0,54761,0.94,2872,0


In [5]:
df_dac.columns

Index(['group', 'indicator', 'covid_deaths', 'total_deaths',
       'percent_expected_deaths', 'pneumonia_deaths',
       'pneumonia_and_covid_deaths'],
      dtype='object')

Now we are gonna create a new dataframe with only the columns we are interested in.
We´ll name it dealca (deaths_all_causes)

In [6]:
cols = ['group', 'indicator', 'covid_deaths', 'total_deaths']
df_dealca = pd.DataFrame(columns=cols)
df_dealca['group'] = df_dac['group']
df_dealca['indicator'] = df_dac['indicator']
df_dealca['covid_deaths'] = df_dac['covid_deaths']
df_dealca['total_deaths'] = df_dac['total_deaths']

df_dealca.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths
0,By week,Total Deaths,3307,490389
1,By week,2/1/2020,0,56266
2,By week,2/8/2020,0,56587
3,By week,2/15/2020,0,55189
4,By week,2/22/2020,0,54761


The next step is to create two different subsets, one with the data grouped by state and another by group age

In [7]:
df_dealca_state = df_dealca.loc[df_dealca['group'] == "By state", ['indicator', 'covid_deaths', 'total_deaths']]
df_dealca_age = df_dealca.loc[df_dealca['group'] == "By age", ['indicator', 'covid_deaths', 'total_deaths']]

df_dealca_state = df_dealca_state.iloc[1:]

df_dealca_state = df_dealca_state.reset_index(drop=True)
df_dealca_age = df_dealca_age.reset_index(drop=True)


We save both dataframes in pickles

In [12]:
df_dealca_state.to_pickle("../../Pickles/deaths_all_causes_bystate.pkl")
df_dealca_age.to_pickle("../../Pickles/deaths_allcauses_byage.pkl")

We verify that it works

In [13]:
df_states = pd.read_pickle("../../Pickles/deaths_all_causes_bystate.pkl")
df_states.head()

Unnamed: 0,indicator,covid_deaths,total_deaths
0,Alabama,7,8842
1,Alaska,1,613
2,Arizona,26,11862
3,Arkansas,3,5939
4,California,135,50784


In [14]:
df_ages = pd.read_pickle("../../Pickles/deaths_allcauses_byage.pkl")
df_ages.head()

Unnamed: 0,indicator,covid_deaths,total_deaths
0,All ages,3307,490389
1,Under 1 year,0,2605
2,1&ndash;4 years,1,529
3,5&ndash;14 years,0,769
4,15&ndash;24 years,4,4431


We also save the our initial dataframe, in case we want to access data grouped by week later on

In [15]:
df_dealca.to_pickle("../../Pickles/cdc_deaths_all_causes.pkl")
df = pd.read_pickle("../../Pickles/cdc_deaths_all_causes.pkl")
df.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths
0,By week,Total Deaths,3307,490389
1,By week,2/1/2020,0,56266
2,By week,2/8/2020,0,56587
3,By week,2/15/2020,0,55189
4,By week,2/22/2020,0,54761
