In [1]:
from fbprophet import Prophet
import pandas as pd
import numpy as np
from fbprophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go

In [2]:
def get_covid_data():
    
    #get the latest data from OxCGRT
    DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
    full_df = pd.read_csv(DATA_URL,
                    parse_dates=['Date'],
                    encoding="ISO-8859-1",
                    dtype={"RegionName": str,
                           "RegionCode":str,
                           "CountryName":str,
                           "CountryCode":str},
                    error_bad_lines=False)

    #add new cases and new deaths columns

    return full_df

def mean_percent_error(y_test, y_hat):
    error = np.abs(y_test - y_hat)
    percent_error = error/y_test
    mean_percent_error = percent_error.sum() / len(y_test)
    return mean_percent_error

def find_best_regressors(df, train_df, test_df):
    todrop = [c for c in df.columns if c in ['ds','y','NewCases','NewDeaths', 'ConfirmedCases','ConfirmedDeaths']]
    regressors = df.columns.drop(todrop)

    keepers = []
    trials = pd.DataFrame(columns = ['regressors','MAPE'])
    improving = True
    while improving:
        best = None
        improving = False

        print(f'current keepers are {keepers}')
        for regressor in regressors:
            keepers.append(regressor)
            m = pr.Prophet(seasonality_mode = 'multiplicative',
                            yearly_seasonality = False, 
                            daily_seasonality = False, 
                            weekly_seasonality = True)
            m.add_country_holidays(country_name='US')
            for keeper in keepers:
                m.add_regressor(keeper)
            m.fit(train_df)
            future = m.make_future_dataframe(periods=len(test_df))
            future = pd.merge(future,df[['ds'] + keepers].reset_index(drop=True),how = 'outer', on = 'ds')
            forecast = m.predict(future)
            prophet_mape = mean_percent_error(test_df['y'].values, forecast['yhat'][-len(test_df):].values)
            trials = trials.append({'regressors':f'{keepers}','MAPE':prophet_mape}, ignore_index=True)
            #MAPE has improved
            if prophet_mape == trials['MAPE'].min():
                improving = True
                best = regressor
            keepers.pop()
        if best:
            keepers.append(best)
        if improving:
            regressors = regressors.drop(best)
    return keepers

In [3]:
division = 'state'
region = 'Washington'
prediction = 'ConfirmedCases'

full_df = get_covid_data()
if division == 'country':
    df = full_df[(full_df['Jurisdiction'] == 'NAT_TOTAL') & (full_df['CountryName'] == region)][:-1]
elif division == 'state':
    df = full_df[(full_df['Jurisdiction'] == 'STATE_TOTAL') & (full_df['RegionName'] == region)][:-1]

df = df.iloc[:,5:]
cols = [c for c in df.columns if (c.lower()[-10:] != 'fordisplay') and (c.lower()[-4:] != 'flag')]
df = df[cols]
df = df.dropna(how='all', axis=1)
df = df.fillna(method = 'ffill')
df = df.fillna(0)
df = df.rename(columns = {'Date':'ds',
                          prediction:'y'
                          })
df = df.drop(columns = [col for col in df.columns if col in ['ConfirmedDeaths','ConfirmedCases']])
train_df = df[df['ds'] < '2020-12-01']
test_df = df[df['ds'] >= '2020-12-01']

keepers = df[test_df.columns.drop(['y'])]



## Prediction with no exogenous variables

In [34]:
m = Prophet(seasonality_mode = 'multiplicative',
                yearly_seasonality = False, 
                daily_seasonality = False, 
                weekly_seasonality = True)
m.add_country_holidays(country_name='US')

m.fit(train_df)
future = m.make_future_dataframe(periods=len(test_df))

forecast = m.predict(future)
prophet_mape = mean_percent_error(test_df['y'].values, forecast['yhat'][-len(test_df):].values)
print(f'MAPE without exogenous variables is {prophet_mape}')

fig = plot_plotly(m, forecast, changepoints = False, xlabel="Date", 
                  uncertainty = True,
                  ylabel=prediction, plot_cap=True)
fig.add_trace(go.Scatter(x=test_df['ds'], y=test_df['y'], 
                         mode = 'markers',
                         marker=go.scatter.Marker(color='green', size = 4),
                         name = f'True'
                         ))
fig.show()

MAPE without exogenous variables is 0.2067572970336526


## Comparing Intervention Strategies:
The below graph compares what happens to the model's predictions for December when government interventions are frozen at 2020-12-01 values, when they have the actual value that they did, and if they are raised by 10 points.

In [26]:
keepers

Unnamed: 0,ds,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,...,H3_Contact tracing,H4_Emergency investment in healthcare,H5_Investment in vaccines,H6_Facial Coverings,H7_Vaccination policy,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex
97722,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0
97723,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0
97724,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0
97725,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0
97726,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98082,2020-12-26,3.0,2.0,2.0,4.0,1.0,1.0,1.0,3.0,1.0,...,1.0,0.0,0.0,3.0,2.0,65.74,70.24,64.89,63.33,75.0
98083,2020-12-27,3.0,2.0,2.0,4.0,1.0,1.0,1.0,3.0,1.0,...,1.0,0.0,0.0,3.0,2.0,65.74,70.24,64.89,63.33,75.0
98084,2020-12-28,3.0,2.0,2.0,4.0,1.0,1.0,1.0,3.0,1.0,...,1.0,0.0,0.0,3.0,2.0,65.74,70.24,64.89,63.33,75.0
98085,2020-12-29,3.0,2.0,2.0,4.0,1.0,1.0,1.0,3.0,1.0,...,1.0,0.0,0.0,3.0,2.0,65.74,70.24,64.89,63.33,75.0


In [36]:
#Actual government response indices
m = Prophet(seasonality_mode = 'multiplicative',
                yearly_seasonality = False, 
                daily_seasonality = False, 
                weekly_seasonality = True)
m.add_country_holidays(country_name='US')
for keeper in keepers.columns.drop('ds'):
    m.add_regressor(keeper)
m.fit(train_df)
future = m.make_future_dataframe(periods=len(test_df))

future = pd.merge(future, keepers, how = 'outer', on = 'ds')

forecast = m.predict(future)
prophet_mape = mean_percent_error(test_df['y'].values, forecast['yhat'][-len(test_df):].values)
print(f'MAPE is {prophet_mape}')

#Freeze Government Interventions at 2020-12-01
keepers_frozen = keepers.copy()
for col in keepers.columns.drop('ds'):
    keepers_frozen.loc[keepers_frozen['ds'] >= '2020-12-01',col] \
                    = df.loc[df['ds'] == '2020-12-01',col].values[0]
m2 = Prophet(seasonality_mode = 'multiplicative',
                yearly_seasonality = False, 
                daily_seasonality = False, 
                weekly_seasonality = True)
m2.add_country_holidays(country_name='US')
for keeper in keepers_frozen.columns.drop('ds'):
    m2.add_regressor(keeper)
m2.fit(train_df)
future_frozen = m.make_future_dataframe(periods=len(test_df))

future_frozen = pd.merge(future_frozen, keepers_frozen, how = 'outer', on = 'ds')

forecast_frozen = m.predict(future_frozen)
prophet_mape_frozen = mean_percent_error(test_df['y'].values, forecast_frozen['yhat'][-len(test_df):].values)
print(f'frozen MAPE is {prophet_mape_frozen}')

#Increase Government Response Indices by 10
keepers_increased = keepers.copy()
keepers_increased.loc[keepers_increased['ds'] >= '2020-12-01','GovernmentResponseIndex'] \
                = df.loc[df['ds'] == '2020-12-01','GovernmentResponseIndex'].values[0] +10
keepers_increased.loc[keepers_increased['ds'] >= '2020-12-01','ContainmentHealthIndex'] \
                = df.loc[df['ds'] == '2020-12-01','ContainmentHealthIndex'].values[0] +10
m3 = Prophet(seasonality_mode = 'multiplicative',
                yearly_seasonality = False, 
                daily_seasonality = False, 
                weekly_seasonality = True)
m3.add_country_holidays(country_name='US')
for keeper in keepers_increased.columns.drop('ds'):
    m3.add_regressor(keeper)
m3.fit(train_df)
future_increased = m.make_future_dataframe(periods=len(test_df))

future_increased = pd.merge(future_increased, keepers_increased, how = 'outer', on = 'ds')

forecast_increased = m.predict(future_increased)
prophet_mape = mean_percent_error(test_df['y'].values, forecast_increased['yhat'][-len(test_df):].values)
print(f'increase MAPE is {prophet_mape}')


fig = plot_plotly(m, forecast, changepoints = False, xlabel="Date", 
                  uncertainty = True,
                  ylabel=prediction, plot_cap=True)
fig.add_trace(go.Scatter(x=test_df['ds'], y=test_df['y'], 
                         mode = 'markers',
                         marker=go.scatter.Marker(color='green', size = 4),
                         name = f'True'
                         ))
fig.add_trace(go.Scatter(x=forecast_frozen['ds'], y =forecast_frozen['yhat'],
                         marker=go.scatter.Marker(color='yellow', size = 4),
                         name = f'prediction if frozen'
                         ))
fig.add_trace(go.Scatter(x=forecast_increased['ds'], y=forecast_increased['yhat'], 
                         mode = 'lines',
                         marker=go.scatter.Marker(color='red', size = 4),
                         name = f'prediction if increased'
                         ))
fig.layout.title = {'text': f'True and Predicted {prediction} in {region}'}
fig.update_layout(showlegend=True)
fig.show()

MAPE is 0.0654282653283666
frozen MAPE is 0.17062841614845656
increase MAPE is 0.5639664027664775


## Model is reversing causitive relationship between intervention and cases.  If interventions increase, model predicts cases increase.  In fact, it's more likely that governments increase interventions in response to rising cases.  My conclusion is that exogenous variables cannot be used to predict future case levels 