We are getting the provisional death counts for Coronavirus via API through data.cdc.gov


In [2]:
import pandas as pd
import requests
import json

In [3]:
url = "https://data.cdc.gov/resource/hc4f-j6nb.json"
response = requests.get(url)


In [4]:
r_js = response.json()

We pour our data into a dataframe

In [5]:
df_dac = pd.DataFrame(r_js)
df_dac.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths,percent_expected_deaths,pneumonia_deaths,pneumonia_and_covid_deaths
0,By week,Total Deaths,2214,476602,0.83,27131,916
1,By week,2/1/2020,0,56111,0.95,2995,0
2,By week,2/8/2020,0,56295,0.94,2989,0
3,By week,2/15/2020,0,54575,0.93,2874,0
4,By week,2/22/2020,0,54208,0.93,2845,0


In [6]:
df_dac.columns

Index(['group', 'indicator', 'covid_deaths', 'total_deaths',
       'percent_expected_deaths', 'pneumonia_deaths',
       'pneumonia_and_covid_deaths'],
      dtype='object')

Now we are gonna create a new dataframe with only the columns we are interested in.
We´ll name it dealca (deaths_all_causes)

In [7]:
cols = ['group', 'indicator', 'covid_deaths', 'total_deaths']
df_dealca = pd.DataFrame(columns=cols)
df_dealca['group'] = df_dac['group']
df_dealca['indicator'] = df_dac['indicator']
df_dealca['covid_deaths'] = df_dac['covid_deaths']
df_dealca['total_deaths'] = df_dac['total_deaths']

df_dealca.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths
0,By week,Total Deaths,2214,476602
1,By week,2/1/2020,0,56111
2,By week,2/8/2020,0,56295
3,By week,2/15/2020,0,54575
4,By week,2/22/2020,0,54208


The next step is to create two different subsets, one with the data grouped by state and another by group age

In [48]:
df_dealca_state = df_dealca.loc[df_dealca['group'] == "By state", ['indicator', 'covid_deaths', 'total_deaths']]
df_dealca_age = df_dealca.loc[df_dealca['group'] == "By age", ['indicator', 'covid_deaths', 'total_deaths']]

df_dealca_state = df_dealca_state.iloc[1:]

df_dealca_state = df_dealca_state.reset_index(drop=True)
df_dealca_age = df_dealca_age.reset_index(drop=True)


We save both dataframes in pickles

In [49]:
df_dealca_state.to_pickle("deaths_all_causes_bystate.pkl")
df_dealca_age.to_pickle("deaths_allcauses_byage.pkl")

We verify that it works

In [50]:
df_states = pd.read_pickle("deaths_all_causes_bystate.pkl")
df_states.head()

Unnamed: 0,indicator,covid_deaths,total_deaths
0,Alabama,2,8589
1,Alaska,0,600
2,Arizona,0,10540
3,Arkansas,0,5680
4,California,98,49660


In [51]:
df_ages = pd.read_pickle("deaths_allcauses_byage.pkl")
df_ages.head()

Unnamed: 0,indicator,covid_deaths,total_deaths
0,All ages,2214,476602
1,Under 1 year,0,2529
2,1-4 years,1,517
3,5-14 years,0,749
4,15-24 years,1,4275


We also save the our initial dataframe, in case we want to access data grouped by week later on

In [52]:
df_dealca.to_pickle("cdc_deaths_all_causes.pkl")
df = pd.read_pickle("cdc_deaths_all_causes.pkl")
df.head()

Unnamed: 0,group,indicator,covid_deaths,total_deaths
0,By week,Total Deaths,2214,476602
1,By week,2/1/2020,0,56111
2,By week,2/8/2020,0,56295
3,By week,2/15/2020,0,54575
4,By week,2/22/2020,0,54208
