In [1]:
import requests
import json

import pandas as pd
import datetime as dt

In [2]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
# Always make it pretty.
plt.style.use('ggplot')
font = {'weight': 'bold',
        'size':   16}
plt.rc('font', **font)

In [10]:
#get state and national data from the CovidTracking project
state_api_url = 'https://covidtracking.com/api/v1/states/daily.json'
national_api_url = 'https://covidtracking.com/api/us/daily'

In [28]:
r_states = requests.get(state_api_url)
r_national = requests.get(national_api_url)

In [29]:
display(r_states.status_code, r_national.status_code)

200

200

In [65]:
covid_list_states = json.loads(r_states.content)
covid_list_national = json.loads(r_national.content)

In [66]:
list_of_dicts_states = []

for i in range(len(covid_list_states)):
    list_of_dicts_states.append(covid_list_states[i])

In [67]:
list_of_dicts_national = []

for i in range(len(covid_list_national)):
    list_of_dicts_national.append(covid_list_national[i])

In [68]:
df_states = pd.DataFrame(list_of_dicts_states)

In [69]:
df_national = pd.DataFrame(list_of_dicts_national)

In [70]:
df_states['date'] = pd.to_datetime(df_states['date'], format='%Y%m%d', errors='ignore')
df_national['date'] = pd.to_datetime(df_national['date'], format='%Y%m%d', errors='ignore')

In [72]:
start_date = min(df_states.date)
end_date = max(df_states.date)
date_series = pd.date_range(start_date, end_date, freq='D').to_series()
df_date = pd.DataFrame(date_series)
df_date = df_date.reset_index()
df_date.columns = ['Date', 'Date2']
df_date = df_date.drop('Date2', axis = 1)

In [73]:
df_date['Week'] = df_date['Date'].apply(lambda x: (x + dt.timedelta(days=1)).weekofyear)

In [74]:
df_date['Calendar_year'] = df_date['Date'].dt.year

In [75]:
#Merge State and Date DataFrame to get Week and Calendar_year
df_states_official = pd.merge(df_states, df_date,  how='left', 
                                left_on=['date'], right_on = ['Date'])



In [76]:
df_national_official = pd.merge(df_national, df_date,  how='left', 
                                left_on=['date'], right_on = ['Date'])


In [78]:
state_columns = ['state', 'Date', 'Week', 'Calendar_year', 'deathIncrease',
       'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease',
       'totalTestResultsIncrease', ]

In [79]:
national_columns = ['Date', 'Week', 'Calendar_year', 'deathIncrease',
       'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease',
       'totalTestResultsIncrease', ]

In [80]:
df_states_official = df_states_official[state_columns]

In [81]:
df_national_official = df_national_official[national_columns]

In [82]:
column_list = df_states_official.columns
new_column_names = []
for column in column_list:
    new_column_name = column.capitalize().replace(' ','_').replace('&','_')
    new_column_names.append(new_column_name)
df_states_official.columns = new_column_names

In [83]:
df_states_official = df_states_official.sort_values(['State','Date']) 
df_states_official = df_states_official.reset_index()
df_states_official = df_states_official.drop('index', axis = 1)

In [85]:
column_list = df_national_official.columns
new_column_names = []
for column in column_list:
    new_column_name = column.capitalize().replace(' ','_').replace('&','_')
    new_column_names.append(new_column_name)
df_national_official.columns = new_column_names

In [86]:
df_national_official = df_national_official.sort_values('Date') 
df_national_official = df_national_official.reset_index()
df_national_official = df_national_official.drop('index', axis = 1)

In [87]:
df_states_official = df_states_official.dropna()
df_national_official = df_national_official.dropna()

In [101]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [102]:
state_map = {v: k for k, v in us_state_abbrev.items()}


In [105]:
df_states_official['State'] = df_states_official['State'].apply(lambda x: state_map[x])

In [129]:
#save dataframe as csv file to be used in other files
df_states_official.to_csv('data/states_covid_pandas_df.csv')
df_national_official.to_csv('data/national_covid_pandas_df.csv')

In [115]:
# df_national_official.groupby('Week').count()

Unnamed: 0_level_0,Date,Calendar_year,Deathincrease,Hospitalizedincrease,Negativeincrease,Positiveincrease,Totaltestresultsincrease
Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10,3,3,3,3,3,3,3
11,7,7,7,7,7,7,7
12,7,7,7,7,7,7,7
13,7,7,7,7,7,7,7
14,7,7,7,7,7,7,7
15,4,4,4,4,4,4,4


In [113]:
# df_national_official.groupby('Week').sum()['Deathincrease']

Week
10       6.0
11      33.0
12     223.0
13    1693.0
14    6353.0
15    6177.0
Name: Deathincrease, dtype: float64

In [114]:
# 6400/54000

0.11851851851851852

In [116]:
# 6177/4

1544.25

In [117]:
# (1544*3) + 6177

10809

In [118]:
# 11000/54000

0.2037037037037037

In [121]:
# df_states_official.groupby('State').sum()['Deathincrease'].sort_values(ascending=False)

State
New York                    6268.0
New Jersey                  1504.0
Michigan                     959.0
Louisiana                    652.0
Illinois                     462.0
California                   442.0
Massachusetts                433.0
Washington                   384.0
Georgia                      362.0
Florida                      309.0
Pennsylvania                 309.0
Connecticut                  277.0
Indiana                      203.0
Ohio                         193.0
Colorado                     179.0
Texas                        177.0
Maryland                     124.0
Wisconsin                     99.0
Arizona                       80.0
Tennessee                     79.0
Oklahoma                      79.0
Virginia                      75.0
Nevada                        71.0
Mississippi                   67.0
Alabama                       66.0
Kentucky                      65.0
South Carolina                63.0
Missouri                      58.0
North Carolina

In [119]:
# mask = df_states_official['State'] == 'New York'
# df_states_official[mask].groupby('Week').sum()['Deathincrease']

Week
10       0.0
11       0.0
12      44.0
13     684.0
14    2837.0
15    2703.0
Name: Deathincrease, dtype: float64

In [123]:
# 1959*52

101868

In [124]:
# 2837/

SyntaxError: invalid syntax (<ipython-input-124-ad5ff1321b20>, line 1)

In [125]:
# 2703/4

675.75

In [128]:
# 2703+(675*3)

4728