In [46]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import datetime, date

In [31]:
def parser(x):
	return datetime.strptime(x+'20', '%m/%d/%Y')

In [32]:
def avg_relative_error(y, p):
    error = 0
    er = []
    for i in range(len(y)):
        if y[i] != 0 and p[i] != 0:
            e = (((p[i] - y[i]) / max(y[i], p[i])))
            er.append(e[0])
            error += e[0]
    error = error / len(y)
    return abs(error)

In [37]:
countries = ['Austria', 'Brazil', 'China', 'Czechia', 'Denmark', 'France', 'Germany', 'India', 'Italy', 'Korea_South', 'South_Africa', 'Spain', 'UK', 'US', 'Australia']
# countries = ['Austria']
results = []
for country in countries:
    result = {'country':'', 'conf': 0, 'dead': 0, 'recov': 0}
    result['country'] = country
    file = '../Data_Preprocessing/Clean/' + country + '.csv'
    df = pd.read_csv(file, index_col = ['Date'], parse_dates = ['Date'], date_parser = parser, squeeze = True)
    for index, row in df.iterrows():
        df.at[index, 'Confirmed_Cases'] = float(row['Confirmed_Cases'])
        df.at[index, 'Recoveries'] = float(row['Recoveries'])
        df.at[index, 'Deaths'] = float(row['Deaths'])
        
    confirmed = df[['Confirmed_Cases']]
    recoveries = df[['Recoveries']]
    deaths = df[['Deaths']]
    
    new_header = confirmed.iloc[0]
    confirmed = confirmed[1:]
    confirmed.columns = new_header
    
    new_header = recoveries.iloc[0]
    recoveries = recoveries[1:]
    recoveries.columns = new_header
    
    new_header = deaths.iloc[0]
    deaths = deaths[1:]
    deaths.columns = new_header
    
    confirmed = confirmed.values
    deaths = deaths.values
    recoveries = recoveries.values
    
    sp = int(len(confirmed)*0.8)
    
    X, y = train_test_split(confirmed, test_size=0.2, shuffle=True)
    X_r, y_r = train_test_split(recoveries, test_size=0.2, shuffle=True)
    X_d, y_d = train_test_split(deaths, test_size=0.2, shuffle=True)
    

    history = [a for a in X]
    predictions = []
    
    for t in range(len(y)):
        model = ARIMA(history, order=(1, 0, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y[t])
#         print('predicted=%f, expected=%f' % (yhat, y[t]))
    error = avg_relative_error(y, predictions)
    result['conf'] = error
    
    history = [a for a in X_d]
    predictions = []
    
    for t in range(len(y_d)):
        model = ARIMA(history, order=(1, 1, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y_d[t])
#         print('predicted=%f, expected=%f' % (yhat, y_d[t]))
    error = avg_relative_error(y_d, predictions)
    result['dead'] = error
    
    history = [a for a in X_r]
    predictions = []
    
    for t in range(len(y_r)):
        model = ARIMA(history, order=(1, 0, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y_r[t])
#         print('predicted=%f, expected=%f' % (yhat, y_r[t]))
    error = avg_relative_error(y_r, predictions)
    result['recov'] = error
    
    results.append(result)
    
    print(result)
    
# results

{'country': 'Austria', 'conf': 0.007936499820049254, 'dead': 0.06944444444444445, 'recov': 0.032741763958574185}
{'country': 'Brazil', 'conf': 0.0123430847488199, 'dead': 0.060546875, 'recov': 0.0}
{'country': 'China', 'conf': 0.7576428770293087, 'dead': 0.08823449947831663, 'recov': 0.5087873378059298}
{'country': 'Czechia', 'conf': 0.127626708503617, 'dead': 0.0, 'recov': 0.12123397435897436}
{'country': 'Denmark', 'conf': 0.10118112582111051, 'dead': 0.031210976523476527, 'recov': 0.0385098594053818}
{'country': 'France', 'conf': 0.14043832032457773, 'dead': 0.08018115015686393, 'recov': 0.1767075494905658}
{'country': 'Germany', 'conf': 0.1092811451902496, 'dead': 0.06139380530973451, 'recov': 0.11982288561956403}
{'country': 'India', 'conf': 0.1854662766109196, 'dead': 0.02180451127819549, 'recov': 0.05855922044377053}
{'country': 'Italy', 'conf': 0.15404340377368891, 'dead': 0.05882075921110191, 'recov': 0.014240999692747107}
{'country': 'Korea_South', 'conf': 0.22240384252039855

In [35]:
fit.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,79.0
Model:,"ARMA(1, 0)",Log Likelihood,-446.625
Method:,css-mle,S.D. of innovations,69.029
Date:,"Mon, 11 May 2020",AIC,899.249
Time:,05:48:46,BIC,906.358
Sample:,0,HQIC,902.097
,,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,22.8823,7.482,3.058,0.002,8.217,37.548
ar.L1.y,-0.0385,0.112,-0.344,0.731,-0.258,0.181

0,1,2,3,4
,Real,Imaginary,Modulus,Frequency
AR.1,-25.9920,+0.0000j,25.9920,0.5000


In [54]:
result = {'country':'', 'conf': 0, 'dead': 0, 'recov': 0}
country = 'Austria'
result['country'] = country
file = '../Data_Preprocessing/Clean/' + country + '.csv'
df = pd.read_csv(file, index_col = ['Date'], parse_dates = ['Date'], date_parser = parser, squeeze = True)
for index, row in df.iterrows():
    df.at[index, 'Confirmed_Cases'] = float(row['Confirmed_Cases'])
    df.at[index, 'Recoveries'] = float(row['Recoveries'])

confirmed = df[['Confirmed_Cases']]
recoveries = df[['Recoveries']]

new_header = confirmed.iloc[0]
confirmed = confirmed[1:]
confirmed.columns = new_header

new_header = recoveries.iloc[0]
recoveries = recoveries[1:]
recoveries.columns = new_header

confirmed = confirmed.values
recoveries = recoveries.values

X, y = train_test_split(confirmed, test_size=0.2, shuffle=True)
X_r, y_r = train_test_split(recoveries, test_size=0.2, shuffle=True)


historyC = [a for a in confirmed]
predictionsC = []

historyR = [a for a in recoveries]
predictionsR = []

c = 1

consec = 1

while True:
    model = ARIMA(historyC, order=(2, 0, 0))
    fit = model.fit(disp = 0)
    output = fit.forecast()
#     print(c, "C", output[0][0] + output[1][0])
    yhat = int(output[0][0] + output[1][0])
    historyC.append(yhat)

    modelR = ARIMA(historyR, order=(2, 0, 0))
    fitR = modelR.fit(disp = 0)
    outputR = fitR.forecast()
#     print(c, "R", outputR[0][0] + outputR[1][0])
    yhatR = int(outputR[0][0] + outputR[1][0])
    historyR.append(yhatR)
    
    if(yhatR >= yhat):
        if(consec > 1):
            print (c)
            break
        consec += 1
    else:
        consec = 1
    
    c += 1

result

2


{'country': 'Austria', 'conf': 0, 'dead': 0, 'recov': 0}