In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import datetime

In [2]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [3]:
def parser(x):
	return datetime.strptime(x+'20', '%m/%d/%Y')

In [4]:
def avg_relative_error(y, p):
    error = 0
    er = []
    for i in range(len(y)):
        if y[i] != 0 and p[i] != 0:
            e = (((p[i] - y[i]) / max(y[i], p[i])))
            er.append(e[0])
            error += e[0]
    error = error / len(y)
    return abs(error)

In [29]:
countries = ['Austria', 'Brazil', 'China', 'Czechia', 'Denmark', 'France', 'Germany', 'India', 'Italy', 'Korea_South', 'South_Africa', 'Spain', 'UK', 'US', 'Australia']
# countries = ['Austria']
results = []
for country in countries:
    result = {'country':'', 'conf': 0, 'dead': 0, 'recov': 0}
    result['country'] = country
    file = '../Data_Preprocessing/Clean/' + country + '.csv'
    df = pd.read_csv(file, index_col = ['Date'], parse_dates = ['Date'], date_parser = parser, squeeze = True)
    for index, row in df.iterrows():
        df.at[index, 'Confirmed_Cases'] = float(row['Confirmed_Cases'])
        df.at[index, 'Recoveries'] = float(row['Recoveries'])
        df.at[index, 'Deaths'] = float(row['Deaths'])
        
    confirmed = df[['Confirmed_Cases']]
    recoveries = df[['Recoveries']]
    deaths = df[['Deaths']]
    
    new_header = confirmed.iloc[0]
    confirmed = confirmed[1:]
    confirmed.columns = new_header
    
    new_header = recoveries.iloc[0]
    recoveries = recoveries[1:]
    recoveries.columns = new_header
    
    new_header = deaths.iloc[0]
    deaths = deaths[1:]
    deaths.columns = new_header
    
    confirmed = confirmed.values
    deaths = deaths.values
    recoveries = recoveries.values
    
    sp = int(len(confirmed)*0.8)
    
    X, y = train_test_split(confirmed, test_size=0.2, shuffle=True)
    X_r, y_r = train_test_split(recoveries, test_size=0.2, shuffle=True)
    X_d, y_d = train_test_split(deaths, test_size=0.2, shuffle=True)
    
#     X, y = confirmed[0:sp], confirmed[sp:]
#     X_r, y_r = recoveries[0:sp], recoveries[sp:]
#     X_d, y_d = deaths[0:sp], deaths[sp:]
    
    history = [a for a in X]
    predictions = []
    
    for t in range(len(y)):
        model = ARIMA(history, order=(1, 0, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y[t])
#         print('predicted=%f, expected=%f' % (yhat, y[t]))
    error = avg_relative_error(y, predictions)
    result['conf'] = error
    
    history = [a for a in X_d]
    predictions = []
    
    for t in range(len(y_d)):
        model = ARIMA(history, order=(1, 1, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y_d[t])
#         print('predicted=%f, expected=%f' % (yhat, y_d[t]))
    error = avg_relative_error(y_d, predictions)
    result['dead'] = error
    
    history = [a for a in X_r]
    predictions = []
    
    for t in range(len(y_r)):
        model = ARIMA(history, order=(1, 0, 0))
        fit = model.fit(disp = 0)
        output = fit.forecast()
        yhat = int(output[0])
        predictions.append(yhat)
        history.append(y_r[t])
#         print('predicted=%f, expected=%f' % (yhat, y_r[t]))
    error = avg_relative_error(y_r, predictions)
    result['recov'] = error
    
    results.append(result)
    
results



[{'country': 'Austria',
  'conf': 0.23011857881272257,
  'dead': 0.09444444444444444,
  'recov': 0.15737767629499422},
 {'country': 'Brazil',
  'conf': 0.12573375041859053,
  'dead': 0.025186567164179104,
  'recov': 0.0},
 {'country': 'China',
  'conf': 0.8120682521403699,
  'dead': 0.07136755334633423,
  'recov': 0.2928299203302256},
 {'country': 'Czechia',
  'conf': 0.15392233995667648,
  'dead': 0.03977272727272727,
  'recov': 0.04513888888888889},
 {'country': 'Denmark',
  'conf': 0.1038304406182012,
  'dead': 0.03535353535353536,
  'recov': 0.00533655972510462},
 {'country': 'France',
  'conf': 0.029359332067071656,
  'dead': 0.03361306479770799,
  'recov': 0.06668138235422696},
 {'country': 'Germany',
  'conf': 0.046506410195436276,
  'dead': 0.07521136054364738,
  'recov': 0.17671086081139883},
 {'country': 'India',
  'conf': 0.041546982134809375,
  'dead': 0.022797766749379653,
  'recov': 0.17702838827838827},
 {'country': 'Italy',
  'conf': 0.0444962088537683,
  'dead': 0.5658

In [28]:
result = {'country':'', 'conf': 0, 'dead': 0, 'recov': 0}
country = 'Germany'
result['country'] = country
file = '../Data_Preprocessing/Clean/' + country + '.csv'
df = pd.read_csv(file, index_col = ['Date'], parse_dates = ['Date'], date_parser = parser, squeeze = True)
for index, row in df.iterrows():
    df.at[index, 'Confirmed_Cases'] = float(row['Confirmed_Cases'])
    df.at[index, 'Recoveries'] = float(row['Recoveries'])

confirmed = df[['Confirmed_Cases']]
recoveries = df[['Recoveries']]

new_header = confirmed.iloc[0]
confirmed = confirmed[1:]
confirmed.columns = new_header

new_header = recoveries.iloc[0]
recoveries = recoveries[1:]
recoveries.columns = new_header

confirmed = confirmed.values
recoveries = recoveries.values

X, y = train_test_split(confirmed, test_size=0.2, shuffle=True)
X_r, y_r = train_test_split(recoveries, test_size=0.2, shuffle=True)


historyC = [a for a in confirmed]
predictionsC = []

historyR = [a for a in recoveries]
predictionsR = []

c = 1

consec = 1

while True:
    model = ARIMA(historyC, order=(1, 0, 0))
    fit = model.fit(disp = 0)
    output = fit.forecast()
    print(c, "C", output[0][0] + output[1][0])
    yhat = int(output[0][0] + output[1][0])
#     historyC.append(output[0])
#     historyC.append(output[1])
    historyC.append(yhat)

    modelR = ARIMA(historyR, order=(1, 0, 0))
    fitR = modelR.fit(disp = 0)
    outputR = fitR.forecast()
    print(c, "R", outputR[0][0] + outputR[1][0])
    yhatR = int(outputR[0][0] + outputR[1][0])
#     historyR.append(outputR[0])
#     historyR.append(outputR[1])
    historyR.append(yhatR)
    
    if(yhatR >= yhat):
        if(consec > 3):
            print (c)
            break
        consec += 1
    else:
        consec = 1
    
    c += 1

result

1 C 3345.4977449359817
1 R 3921.0312624665175
2 C 3930.197440047909
2 R 4278.4171758425155
3 C 4496.044440368818
3 R 4596.894821155906
4 C 5048.261472966069
4 R 4898.088222430313
5 C 5590.733681680591
5 R 5198.4535986858355
6 C 6128.92302748757
6 R 5505.6996049357585
7 C 6668.468094178913
7 R 5827.221180565021
8 C 7211.235795891288
8 R 6168.122391762954
9 C 7761.138836507047
9 R 6533.3607466215335
10 C 8319.333741971956
10 R 6926.130694314565
11 C 8887.452797103051
11 R 7348.519136608038
12 C 9462.88871843631
12 R 7807.701896556466
13 C 10046.516109877255
13 R 8303.61480051556
14 C 10637.889979868956
14 R 8842.280078227093
15 C 11234.203907144254
15 R 9427.530789582808
16 C 11838.14915049436
16 R 10060.794315510662
17 C 12447.085712853534
17 R 10743.931737344947
18 C 13061.174909893487
18 R 11485.593868064685
19 C 13677.969000348148
19 R 12280.310457040905
20 C 14338.046824742609




20 R 13128.779067461375
21 C 14960.560393020853
21 R 14028.812800217103
22 C 15584.567573021834
22 R 14979.166082458862




23 C 16209.859336372607
23 R 15970.983398174398
24 C 16868.9876353588
24 R 16995.288374968735
25 C 17498.574493913824
25 R 18050.816170098286
26 C 18129.85463692509
26 R 19136.954838405447
27 C 18763.712952489484
27 R 20229.949945661207
27


{'country': 'Germany', 'conf': 0, 'dead': 0, 'recov': 0}