In [30]:
import json
import pandas as pd # Pandas
import pathlib # Built-in path manipulation library
import requests
from datetime import datetime

In [31]:
url = "https://raw.githubusercontent.com/COVID19Tracking/covid-tracking-data/master/data/states_daily_4pm_et.csv"
df_raw = pd.read_csv(url)

In [32]:
states = df_raw['state'].unique()

In [33]:
good = []
bad = []
for state in states:  # For each country
    source = df_raw[df_raw['state']==state] # Only the given state
    if source['recovered'].sum() > 0: # If we have data in the downloaded file for that state
        df = pd.DataFrame(columns=['dates2', 'cum_cases', 'cum_deaths', 'cum_recover',
                               'new_cases', 'new_deaths', 'new_recover', 'new_uninfected'])
        df['dates2'] = source['date'].apply(lambda x: datetime.strftime(datetime.strptime(str(x), '%Y%m%d'), '%m/%d/%y')) # Convert date format
        df['cum_cases'] = source['positive'].values
        df['cum_deaths'] = source['death'].values
        df['cum_recover'] = source['recovered'].values
        df = df.set_index('dates2').fillna(0).astype(int)  # Fill NaN with 0 and convert to int
        df = df.sort_index() # Sort by date ascending
        df[['new_cases', 'new_deaths', 'new_recover']] = \
            df[['cum_cases', 'cum_deaths', 'cum_recover']].diff()
        df['new_uninfected'] = df['new_recover'] + df['new_deaths']
        df = df.fillna(0).astype(int)
        df.to_csv('data/covid_timeseries_US_%s.csv' % state)  # Overwrite old data
        good.append(state)
    else:
        bad.append(state)
        
print("Recovery data for %s" % ','.join(good))
print("No recovery data for %s" % ','.join(bad))

Recovery data for AK,AR,DC,DE,GU,HI,IA,MD,ME,MI,MN,MT,ND,NH,NM,NY,OK,RI,SD,TN,TX,VI,VT,WY
No recovery data for AL,AS,AZ,CA,CO,CT,FL,GA,ID,IL,IN,KS,KY,LA,MA,MO,MP,MS,NC,NE,NJ,NV,OH,OR,PA,PR,SC,UT,VA,WA,WI,WV
