In [None]:
from fbprophet import Prophet
import pandas as pd
from fbprophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go

In [None]:
def get_covid_data():
    
    #get the latest data from OxCGRT
    DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
    full_df = pd.read_csv(DATA_URL,
                    parse_dates=['Date'],
                    encoding="ISO-8859-1",
                    dtype={"RegionName": str},
                    error_bad_lines=False)

    #add new cases and new deaths columns

    
    for state in full_df[(full_df['Jurisdiction'] == 'STATE_TOTAL')]['RegionName'].unique():
        state_inds = (full_df['Jurisdiction'] == 'STATE_TOTAL') & (full_df['RegionName'] == state)
        full_df.loc[state_inds, 'NewCases'] = full_df.loc[state_inds, 'ConfirmedCases'].diff().fillna(0)
        full_df.loc[state_inds, 'NewDeaths'] = full_df.loc[state_inds, 'ConfirmedDeaths'].diff().fillna(0)

    for country in full_df[(full_df['Jurisdiction'] == 'NAT_TOTAL')]['CountryName'].unique():
        nat_inds = (full_df['Jurisdiction'] == 'NAT_TOTAL') & (full_df['CountryName'] == country)
        full_df.loc[nat_inds, 'NewCases'] = full_df.loc[nat_inds, 'ConfirmedCases'].diff().fillna(0)
        full_df.loc[nat_inds, 'NewDeaths'] = full_df.loc[nat_inds, 'ConfirmedDeaths'].diff().fillna(0)

    return full_df

def mean_percent_error(y_test, y_hat):
    error = np.abs(y_test - y_hat)
    percent_error = error/y_test
    mean_percent_error = percent_error.sum() / len(y_test)
    return mean_percent_error

def find_best_regressors(df, train_df, test_df):
    todrop = [c for c in df.columns if c in ['ds','y','NewCases','NewDeaths', 'ConfirmedCases','ConfirmedDeaths']]
    regressors = df.columns.drop(todrop)

    keepers = []
    trials = pd.DataFrame(columns = ['regressors','MAPE'])
    improving = True
    while improving:
        best = None
        improving = False

        print(f'current keepers are {keepers}')
        for regressor in regressors:
            keepers.append(regressor)
            m = pr.Prophet(seasonality_mode = 'multiplicative',
                            yearly_seasonality = False, 
                            daily_seasonality = False, 
                            weekly_seasonality = True)
            m.add_country_holidays(country_name='US')
            for keeper in keepers:
                m.add_regressor(keeper)
            m.fit(train_df)
            future = m.make_future_dataframe(periods=len(test_df))
            future = pd.merge(future,df[['ds'] + keepers].reset_index(drop=True),how = 'outer', on = 'ds')
            forecast = m.predict(future)
            prophet_mape = mean_percent_error(test_df['y'].values, forecast['yhat'][-len(test_df):].values)
            trials = trials.append({'regressors':f'{keepers}','MAPE':prophet_mape}, ignore_index=True)
            #MAPE has improved
            if prophet_mape == trials['MAPE'].min():
                improving = True
                best = regressor
            keepers.pop()
        if best:
            keepers.append(best)
        if improving:
            regressors = regressors.drop(best)
    return keepers

In [None]:
division = 'state'
region = 'Washington'
prediction = 'ConfirmedCases'

full_df = get_covid_data()
if division == 'country':
    df = full_df[(full_df['Jurisdiction'] == 'NAT_TOTAL') & (full_df['CountryName'] == region)][:-1]
elif division == 'state':
    df = full_df[(full_df['Jurisdiction'] == 'STATE_TOTAL') & (full_df['RegionName'] == region)][:-1]

df = df.iloc[:,5:]
cols = [c for c in df.columns if (c.lower()[-10:] != 'fordisplay') and (c.lower()[-4:] != 'flag')]
df = df[cols]
df = df.dropna(how='all')
df = df.fillna(method = 'ffill')
df = df.fillna(0)
df = df.rename(columns = {'Date':'ds',prediction:'y'})
train_df = df[df['ds'] < '2020-12-01'].reset_index(drop=True)
test_df = df[df['ds'] >= '2020-12-01'].reset_index(drop=True)
df

In [None]:
keepers = find_best_regressors(df, train_df, test_df)
m = Prophet(seasonality_mode = 'multiplicative',
                yearly_seasonality = False, 
                daily_seasonality = False, 
                weekly_seasonality = True)
m.add_country_holidays(country_name='US')
for keeper in keepers:
    m.add_regressor(keeper)
m.fit(train_df)
future = m.make_future_dataframe(periods=len(test_df))
future = pd.merge(future,df[['ds'] + keepers].reset_index(drop=True),how = 'outer', on = 'ds')
forecast = m.predict(future)
prophet_mape = mean_percent_error(test_df['y'].values, forecast['yhat'][-len(test_df):].values)
print(f'MAPE is {prophet_mape}')


fig = plot_plotly(m, forecast, changepoints = True, xlabel="Date", ylabel=prediction)
fig.add_trace(go.Scatter(x=test_df['ds'], y=test_df['y'], 
                         mode = 'markers',
                         marker=go.scatter.Marker(color='green', size = 4),
                         name = f'True'
                         ))
fig.layout.title = {'text': f'True and Predicted {prediction}'}
fig.show()

In [None]:
fig = plot_plotly(m, forecast, changepoints = True, xlabel="Date", ylabel=prediction)
fig.add_trace(go.Scatter(x=test_df['ds'], y=test_df['y'], 
                         mode = 'markers',
                         marker=go.scatter.Marker(color='green', size = 4),
                         name = f'True'
                         ))
fig.layout.title = {'text': f'True and Predicted {prediction}'}
print(type(fig))
fig.show()

In [None]:
plot_components_plotly(m, forecast)