In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARMA,ARMAResults,ARIMA,ARIMAResults
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from pmdarima import auto_arima
from datetime import datetime
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.stattools import adfuller
from matplotlib import dates
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv ('OxCGRT_Download_280420_204729_Full1.csv',
                  index_col=['CountryName','Date'],parse_dates=True)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.drop(['CountryCode','C1_Notes','C2_Notes','C3_Notes','C4_Notes','C5_Notes','C6_Notes','C7_Notes','C8_Notes','E1_Notes','E2_Notes',
        'E3_Notes','E4_Notes','H1_Notes','H2_Notes','H3_Notes','H4_Notes','H5_Notes','M1_Wildcard',
        'M1_Notes'],axis=1,inplace=True)

In [None]:
df = df.loc [['Vietnam', 'United States','South Korea', 'Italy']] 
df

In [None]:
df.index

In [None]:
df_Italy = df.loc['Italy']
df_Italy[['ConfirmedCases', 'ConfirmedDeaths']].plot(figsize=(5, 5));

In [None]:
df_SKorea = df.loc['South Korea']
df_SKorea[['ConfirmedCases', 'ConfirmedDeaths']].plot(figsize=(5, 5));

In [None]:
df_US = df.loc['United States']
df_US[['ConfirmedCases', 'ConfirmedDeaths']].plot(figsize=(5, 5));

In [None]:
df_Vietnam = df.loc['Vietnam']
df_Vietnam[['ConfirmedCases', 'ConfirmedDeaths']].plot(figsize=(5, 5));

In [None]:
f, a = plt.subplots(nrows=2, ncols=2, figsize=(11, 13), dpi= 90)
df.reset_index().pivot('Date','CountryName','ConfirmedDeaths').plot(ax=a[0,0], title='Confirmed Deaths', grid=True)
df.reset_index().pivot('Date','CountryName', 'ConfirmedCases').plot(ax=a[0,1],title='Confirmed Cases', grid=True)
df.reset_index().pivot('Date','CountryName', 'StringencyIndex').plot(ax=a[1,0],title='Stringency Index', grid=True)
df.reset_index().pivot('Date','CountryName','LegacyStringencyIndex').plot(ax=a[1,1],
                                                                          title='Legacy Stringency Index', grid=True)

In [None]:
df2=df.loc['South Korea']
df2.index

In [None]:
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC')    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)
    for key,val in result[4].items():
        out[f'critical value ({key})']=val        
    print(out.to_string())   
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [None]:
df2['StringencyIndexForDisplay'].plot(figsize=(12,5));

In [None]:
adf_test(df2['StringencyIndexForDisplay'])

In [None]:
formatter = ticker.StrMethodFormatter('{x:,.0f}')
title = 'Strictness of Policies - South Korea'
ylabel='Stringency Index'
xlabel=''
ax = df2['StringencyIndexForDisplay'].plot(figsize=(12,5),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
auto_arima(df2['StringencyIndexForDisplay'],seasonal=False).summary()

In [None]:
df2['d1'] = diff(df2['StringencyIndexForDisplay'],k_diff=1)
adf_test(df2['d1'],'Strictness of Policies- South Korea')

In [None]:
title = 'Strictness of Policies- South Korea'
lags = 40
plot_acf(df2['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
title = 'Partial Autocorrelation: Strictness of Policies- South Korea'
lags = 40
plot_pacf(df2['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
stepwise_fit = auto_arima(df2['StringencyIndexForDisplay'], start_p=0, start_q=0,
                          max_p=2, max_q=2, m=12,
                          seasonal=False,
                          d=None, trace=True,
                          error_action='ignore', 
                          suppress_warnings=True,
                          stepwise=True)         

stepwise_fit.summary()

In [None]:
len(df2)

In [None]:
train = df2.iloc[:90]
test = df2.iloc[90:]

In [None]:
model = ARIMA(train['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
results.summary()

In [None]:
start=len(train)
end=len(train)+len(test)-1
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('ARIMA(1,1,1) Predictions')

In [None]:
for i in range(len(predictions)):
    print(f"predicted={predictions[i]:<11.10}, expected={test['StringencyIndexForDisplay'][i]}")

In [None]:
title = 'Strictness of Policies- South Korea'
ylabel='Stringency Index'
xlabel=''
ax = test['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
predictions.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
error = mean_squared_error(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) MSE Error: {error:11.10}')

In [None]:
error = rmse(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) RMSE Error: {error:11.10}')

In [None]:
model = ARIMA(df2['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
fcast = results.predict(len(df2),len(df2)+11,typ='levels').rename('ARIMA(1,1,1) Forecast')

In [None]:
title = 'Strictness of Policies- South Korea'
ylabel='Stringency Index'
xlabel=''
ax = df2['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
fcast.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
df3=df.loc['Italy']
df3.index

In [None]:
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC')    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)
    for key,val in result[4].items():
        out[f'critical value ({key})']=val        
    print(out.to_string())   
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [None]:
df3['StringencyIndexForDisplay'].plot(figsize=(12,5));

In [None]:
adf_test(df3['StringencyIndexForDisplay'])

In [None]:
formatter = ticker.StrMethodFormatter('{x:,.0f}')
title = 'Strictness of Policies - Italy'
ylabel='Stringency Index'
xlabel=''
ax = df3['StringencyIndexForDisplay'].plot(figsize=(12,5),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
auto_arima(df3['StringencyIndexForDisplay'],seasonal=False).summary()

In [None]:
df3['d1'] = diff(df3['StringencyIndexForDisplay'],k_diff=1)
adf_test(df3['d1'],'Strictness of Policies- Italy')

In [None]:
title = 'Strictness of Policies- Italy'
lags = 40
plot_acf(df3['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
title = 'Partial Autocorrelation: Strictness of Policies- Italy'
lags = 40
plot_pacf(df3['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
stepwise_fit = auto_arima(df3['StringencyIndexForDisplay'], start_p=0, start_q=0,
                          max_p=2, max_q=2, m=12,
                          seasonal=False,
                          d=None, trace=True,
                          error_action='ignore', 
                          suppress_warnings=True,
                          stepwise=True)         

stepwise_fit.summary()

In [None]:
len(df3)

In [None]:
train = df3.iloc[:90]
test = df3.iloc[90:]

In [None]:
model = ARIMA(train['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
results.summary()

In [None]:
start=len(train)
end=len(train)+len(test)-1
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('ARIMA(1,1,1) Predictions')

In [None]:
for i in range(len(predictions)):
    print(f"predicted={predictions[i]:<11.10}, expected={test['StringencyIndexForDisplay'][i]}")

In [None]:
title = 'Strictness of Policies- Italy'
ylabel='Stringency Index'
xlabel=''
ax = test['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
predictions.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
error = mean_squared_error(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) MSE Error: {error:11.10}')

In [None]:
error = rmse(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) RMSE Error: {error:11.10}')

In [None]:
model = ARIMA(df3['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
fcast = results.predict(len(df3),len(df3)+11,typ='levels').rename('ARIMA(1,1,1) Forecast')

In [None]:
title = 'Strictness of Policies- Italy'
ylabel='Stringency Index'
xlabel=''
ax = df3['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
fcast.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
df4=df.loc['Vietnam']
df4.index

In [None]:
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC')    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)
    for key,val in result[4].items():
        out[f'critical value ({key})']=val        
    print(out.to_string())   
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [None]:
df4['StringencyIndexForDisplay'].plot(figsize=(12,5));

In [None]:
adf_test(df4['StringencyIndexForDisplay'])

In [None]:
formatter = ticker.StrMethodFormatter('{x:,.0f}')
title = 'Strictness of Policies - Vietnam'
ylabel='Stringency Index'
xlabel=''
ax = df4['StringencyIndexForDisplay'].plot(figsize=(12,5),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
auto_arima(df4['StringencyIndexForDisplay'],seasonal=False).summary()

In [None]:
df4['d1'] = diff(df4['StringencyIndexForDisplay'],k_diff=1)
adf_test(df4['d1'],'Strictness of Policies- Vietnam')

In [None]:
title = 'Strictness of Policies- Vietnam'
lags = 40
plot_acf(df4['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
title = 'Partial Autocorrelation: Strictness of Policies- Vietnam'
lags = 40
plot_pacf(df4['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
stepwise_fit = auto_arima(df4['StringencyIndexForDisplay'], start_p=0, start_q=0,
                          max_p=2, max_q=2, m=12,
                          seasonal=False,
                          d=None, trace=True,
                          error_action='ignore', 
                          suppress_warnings=True,
                          stepwise=True)         

stepwise_fit.summary()

In [None]:
len(df4)

In [None]:
train = df4.iloc[:90]
test = df4.iloc[90:]

In [None]:
model = ARIMA(train['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
results.summary()

In [None]:
start=len(train)
end=len(train)+len(test)-1
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('ARIMA(1,1,1) Predictions')

In [None]:
for i in range(len(predictions)):
    print(f"predicted={predictions[i]:<11.10}, expected={test['StringencyIndexForDisplay'][i]}")

In [None]:
title = 'Strictness of Policies- Vietnam'
ylabel='Stringency Index'
xlabel=''
ax = test['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
predictions.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
error = mean_squared_error(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) MSE Error: {error:11.10}')

In [None]:
error = rmse(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) RMSE Error: {error:11.10}')

In [None]:
model = ARIMA(df4['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
fcast = results.predict(len(df4),len(df4)+11,typ='levels').rename('ARIMA(1,1,1) Forecast')

In [None]:
title = 'Strictness of Policies- Vietnam'
ylabel='Stringency Index'
xlabel=''
ax = df4['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
fcast.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
df5=df.loc['United States']
df5.index

In [None]:
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC')    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)
    for key,val in result[4].items():
        out[f'critical value ({key})']=val        
    print(out.to_string())   
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [None]:
df5['StringencyIndexForDisplay'].plot(figsize=(12,5));

In [None]:
adf_test(df5['StringencyIndexForDisplay'])

In [None]:
formatter = ticker.StrMethodFormatter('{x:,.0f}')
title = 'Strictness of Policies - United States'
ylabel='Stringency Index'
xlabel=''
ax = df5['StringencyIndexForDisplay'].plot(figsize=(12,5),title=title)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
auto_arima(df5['StringencyIndexForDisplay'],seasonal=False).summary()

In [None]:
df5['d1'] = diff(df5['StringencyIndexForDisplay'],k_diff=1)
adf_test(df5['d1'],'Strictness of Policies- United States')

In [None]:
title = 'Strictness of Policies- United States'
lags = 40
plot_acf(df5['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
title = 'Partial Autocorrelation: Strictness of Policies- United States'
lags = 40
plot_pacf(df5['StringencyIndexForDisplay'],title=title,lags=lags);

In [None]:
stepwise_fit = auto_arima(df5['StringencyIndexForDisplay'], start_p=0, start_q=0,
                          max_p=2, max_q=2, m=12,
                          seasonal=False,
                          d=None, trace=True,
                          error_action='ignore', 
                          suppress_warnings=True,
                          stepwise=True)         

stepwise_fit.summary()

In [None]:
len(df5)

In [None]:
train = df5.iloc[:90]
test = df5.iloc[90:]

In [None]:
model = ARIMA(train['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
results.summary()

In [None]:
start=len(train)
end=len(train)+len(test)-1
predictions = results.predict(start=start, end=end, dynamic=False, typ='levels').rename('ARIMA(1,1,1) Predictions')

In [None]:
for i in range(len(predictions)):
    print(f"predicted={predictions[i]:<11.10}, expected={test['StringencyIndexForDisplay'][i]}")

In [None]:
title = 'Strictness of Policies- United States'
ylabel='Stringency Index'
xlabel=''
ax = test['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
predictions.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);

In [None]:
error = mean_squared_error(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) MSE Error: {error:11.10}')

In [None]:
error = rmse(test['StringencyIndexForDisplay'], predictions)
print(f'ARIMA(1,1,1) RMSE Error: {error:11.10}')

In [None]:
model = ARIMA(df5['StringencyIndexForDisplay'],order=(1,1,1))
results = model.fit()
fcast = results.predict(len(df5),len(df5)+11,typ='levels').rename('ARIMA(1,1,1) Forecast')

In [None]:
title = 'Strictness of Policies- United States'
ylabel='Stringency Index'
xlabel=''
ax = df5['StringencyIndexForDisplay'].plot(legend=True,figsize=(12,6),title=title)
fcast.plot(legend=True)
ax.autoscale(axis='x',tight=True)
ax.set(xlabel=xlabel, ylabel=ylabel)
ax.yaxis.set_major_formatter(formatter);