In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = 16, 10

# Read in data and do basic massaging

In [2]:
df_raw = pd.read_csv('csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
df_raw.head(2)

FileNotFoundError: [Errno 2] File csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv does not exist: 'csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'

In [None]:
df = df_raw.drop(['Lat', 'Long'], axis=1)                   # drop the Lat/Lon columns
df = df[df['Country/Region'] == 'US']                       # filter all but US out
df = df.drop('Country/Region', axis=1)
# rotate data so states are column headers and dates are row index
df = df.set_index('Province/State')     # set index (for rows) to state
df = df.stack().unstack(level=0)
df.index.rename('Date', inplace=True)

# drop the county names that stopped receiving data as of 3/9
df = df.drop(df.columns[df.columns.str.contains(',')], axis=1)
# drop the two cruise ships
df = df.drop(['Diamond Princess', 'Grand Princess'], axis=1)
# drop rows before 3/10 since states didn't get data before then
df = df.loc['3/10/20':,:]
df

In [None]:
threshold = 100
daysSince = pd.DataFrame(index=range(0,len(df.index)))
for location in df.columns:
    # create list of data once above 'threshold'
    tempList = [df.loc[date, location] 
                              for date in df[location].index 
                              if df.loc[date, location] > threshold]
    if (not tempList) or (tempList[-1] <= threshold):
        continue
    # put that list in another list that has NaN's buffered out to length of the DataFrame
    tempListFilled = list()
    for index, _ in enumerate(daysSince.index):
        if index < len(tempList):
            tempListFilled.append(tempList[index])
        else:
            tempListFilled.append(np.NaN)
    # add the buffered list to the dataframe as a new column
    daysSince[location] = tempListFilled

daysSince = daysSince.dropna(axis=0, how='all')
ax = daysSince.plot(logy=True, legend=False)
#ax.set_yscale('log', basey=2)
ax.set_title('Confirmed cases since hitting {}'.format(threshold), fontsize='large')
ax.set_xlabel('Days since {} confirmed cases'.format(threshold), fontsize='large')
ax.set_ylabel('Number of confirmed cases', fontsize='large')


for location in daysSince.columns:
    previous = 100
    for xdays, datapoint in enumerate(daysSince[location]):
        if pd.isnull(datapoint) or (xdays == len(daysSince[location])-1):
            ax.annotate(location, (xdays-1, previous), 
                        xytext=(10,0), 
                        textcoords='offset pixels',
                        family='sans-serif', fontsize=14, color='darkslategrey')
            break
        else:
            previous = datapoint
            
plt.show()

In [None]:
df2 = df_raw.drop(['Lat', 'Long'], axis=1)
df2 = df2[df2['Country/Region'] == 'US']
df2 = df2.drop('Country/Region', axis=1)
df2 = df2.drop(df2.index[df2['Province/State'].str.contains(",")], axis=0)
df2 = df2.drop(df2.index[df2['Province/State'] == 'Diamond Princess'])
df2 = df2.drop(df2.index[df2['Province/State'] == 'Grand Princess'])
df2 = df2.melt(id_vars='Province/State')
df2.head()

In [None]:
g = sns.FacetGrid(df2, col="Province/State", col_wrap=5, height=3.5)
plt.yscale('log')
g = g.map(plt.plot, 'variable', 'value', marker=".")

In [None]:
df2 = df_raw.drop(['Lat', 'Long'], axis=1)
df2 = df2[df2['Country/Region'] == 'US']
df2 = df2.drop('Country/Region', axis=1)
df2 = df2.drop(df2.index[df2['Province/State'].str.contains(",")], axis=0)
df2 = df2.drop(df2.index[df2['Province/State'] == 'Diamond Princess'])
df2 = df2.drop(df2.index[df2['Province/State'] == 'Grand Princess'])
#df2 = df2.melt(id_vars='Province/State')
df2 = df2.set_index('Province/State')     # set index (for rows) to state, leaving only dates in columns
df2 = df2.stack().unstack(level=0)        # rotate dates into row index, then take state back out and into col
df2.index.rename('Date', inplace=True)
df2 = df2.loc['3/10/20':,:]               # eliminate dates before 3/10
df2.stack()

In [None]:
g = sns.FacetGrid(df2.stack(), col='Province/State')
g = g.map(plt.plot, 'Date', marker=".")

In [None]:
att = sns.load_dataset("attention")
att.head()

In [None]:
g = sns.FacetGrid(att, col="subject", col_wrap=5, height=1.5)
g = g.map(plt.plot, "solutions", "score", marker=".")