# Case study using US States or Counties

Let's start with installing the required packages. When you re-run the notebook, you can comment out the requirements as they are already installed in your notebook environment.

In [None]:
#!pip install -r requirements.txt

In this notebook, we will use Delphi's COVIDcast and Epidata APIs at https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html and https://cmu-delphi.github.io/delphi-epidata/api/gft.html, respectively to compare trends and seasonality for seasonal flu and influenze like illnesses.

In [None]:
import sys
sys.path.append('src/')
from delphi_epidata import Epidata
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
state = 'ca'

In [None]:
start_date = 20200401
stop_date = 20201007

## Survey data for Covid19

In [None]:
res = Epidata.covidcast('jhu-csse', 'confirmed_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], state)
df_jhu = pd.DataFrame(res['epidata'])
df_jhu.head(5)

In [None]:
res = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], state)
df_jhu_7day = pd.DataFrame(res['epidata'])
df_jhu_7day.head(5)

In [None]:
# SAHIKA: This is not coming out correctly

df_jhu['value'].plot()
plt.xlabel('days since ' + str(df_jhu['time_value'][0]))
plt.ylabel('num of incidence')


In [None]:
df_jhu_7day['value'].plot()
plt.xlabel('days since ' + str(df_jhu_7day['time_value'][0]))
plt.ylabel('num of incidence')
#plt.legend(['actual', '7-day avg'])


In [None]:
res = Epidata.covidcast('fb-survey', 'raw_cli', 'day', 'state', \
                        Epidata.range(start_date, stop_date), state)
df_facebook = pd.DataFrame(res['epidata'])

df_facebook['value'].plot()

In [None]:
res = Epidata.covidcast('ght', 'smoothed_search', 'day', 'state', \
                        [start_date, Epidata.range(start_date, stop_date)], state)
df_google = pd.DataFrame(res['epidata'])

df_google['value'].plot()

## Let's automate it for multiple states

In [None]:
def get_covid_data(state,start_date, stop_date):
    
    # get covid data
    res = Epidata.covidcast('jhu-csse', 'confirmed_incidence_num', 'day', 'state', \
                            Epidata.range(start_date, stop_date), state)
    df_jhu = pd.DataFrame(res['epidata'])
    
    # when the covid peaked
    res = Epidata.covidcast('jhu-csse', 'confirmed_7dav_incidence_num', 'day', 'state', \
                            Epidata.range(start_date, stop_date), state)
    df_jhu_7dav = pd.DataFrame(res['epidata'])
    peakday = np.argmax(df_jhu_7dav['value']) 
    
    
    res = Epidata.covidcast('jhu-csse', 'deaths_incidence_num', 'day', 'state', \
                            Epidata.range(start_date, stop_date), state)
    df_jhu_deaths = pd.DataFrame(res['epidata'])
    
    res = Epidata.covidcast('jhu-csse', 'deaths_7dav_incidence_num', 'day', 'state', \
                            Epidata.range(start_date, stop_date), state)
    df_jhu_deaths_7dav = pd.DataFrame(res['epidata'])
    
    return df_jhu, df_jhu_7dav, df_jhu_deaths_7dav, peakday
    
    

In [None]:
def get_flu_data(years, datasource, state):
    df_ili_all = pd.DataFrame(data=[], columns=['week', 'percent_ili'])

    for year in years:
        df_ili_annual = pd.DataFrame(data=[],columns=['week', 'percent_ili'])
        df_ili_annual['week'] = range(0, 52, 1)

        if datasource == 'fluview':
            res = Epidata.fluview([state], [Epidata.range(year + '01', year + '52')])
            ydataname = "num_ili"
        
        if datasource == 'googletrends':
            res = Epidata.gft([state], [Epidata.range(year + '01', year + '52')])
            ydataname = "num"

        df = pd.DataFrame(res['epidata'])
        df_ili_annual['num_ili'] = df[ydataname].iloc[:52]
        
        if year == years[0]:
            df_ili_all = df_ili_annual
        else:
            df_ili_all = df_ili_all.append(df_ili_annual)

    return df_ili_all


In [None]:
"""
Plot annual trends with the mean and 95% confidence interval

"""

def plot_flu_data_w_covid(df_ili_seasonal, df_ili_H1N1, df_ili_H1N5, df_covid_avg, df_covid_deaths_avg, \
                          peakday,start_week_for_plot):
    # annual trends in flu data
    fig = plt.figure(figsize=(20, 6))
        
    sns.lineplot(data=df_ili_seasonal, x='week', y='num_ili')          
    sns.lineplot(data=df_ili_H1N1, x='week', y='num_ili')
    #sns.lineplot(data=df_ili_H1N5, x='week', y='num_ili')
    
    covid_data_startweek = 14
    xweek = [covid_data_startweek + ii/7.0 for ii in range(len(df_covid_avg))]
    
    df_covid_avg['week'] = xweek
    sns.lineplot(data=df_covid_avg, x='week', y='value')
    

    xlim_lower = start_week_for_plot
    xlim_upper = 52
    
    if 0 > xlim_lower:
        plt.text(0, 0.0*max(df_ili_seasonal['num_ili']), 'January')
        
    if 14 > xlim_lower:
        plt.text(14, 0.0*max(df_ili_seasonal['num_ili']), 'April')
        
    if 27 > xlim_lower:
        plt.text(27, 0.0*max(df_ili_seasonal['num_ili']), 'July')
        
    plt.text(40, 0.0*max(df_ili_seasonal['num_ili']), 'October')
    plt.text(49, 0.0*max(df_ili_seasonal['num_ili']), 'December')
    
    peakweek = peakday/7.0 + covid_data_startweek
    if peakweek > xlim_lower:
        plt.text(peakweek, df_covid_avg['value'].iloc[peakday] + 1000, 'PeakWeek=%d'%int(peakweek))

    
    
    plt.ylim([-100, 20000])
    plt.xlim([xlim_lower, xlim_upper])    
    
    plt.title('Covid__US_' + state )          
    #plt.legend(['Seasonal Flu', 'H1N1', 'H5N1', 'Covid19'])
    plt.legend(['Seasonal Flu', 'H1N1', 'Covid19 7-day avg'])
    plt.savefig('Covid__US_' + state + '.png')  
    
    
   

In [None]:
# southern states
states = ['tx', 'ca', 'fl', 'ga', 'la', 'oh', 'mi', 'wi', 'nc']

states = ['wi', 'nc', 'il', 'ca']

# northern states
#states = ['oh', 'mi', 'wa']

start_date, stop_date = 20200401, 20201007

for state in states:
    # 
    _, df_jhu_7dav, df_jhu_deaths_7dav, peakday = get_covid_data(state, start_date, stop_date)
    
    # seasonal flu and other pandemics BEFORE covid
    years = [str(year) for year in range(2011, 2015)]
    df_ili_seasonal = get_flu_data(years, 'googletrends', state)

    years = [str(year) for year in range(2008, 2010)]
    df_ili_H1N1 = get_flu_data(years, 'googletrends', state)

    years = [str(year) for year in range(2004, 2006)]
    df_ili_H1N5 = get_flu_data(years, 'googletrends', state)

    plot_flu_data_w_covid(df_ili_seasonal, df_ili_H1N1, df_ili_H1N5, df_jhu_7dav, df_jhu_deaths_7dav, \
                          peakday, start_week_for_plot=10)
    