In [0]:
# Importing libraries
import sys
import pandas as pd
import numpy as np
import datetime
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

In [0]:
#Command-Line Arguments
startDate = datetime.datetime.strptime('2020-05-10', '%Y-%m-%d')
endDate = datetime.datetime.strptime('2020-05-10', '%Y-%m-%d')

In [0]:
# Reading the data
dataSet = pd.read_csv("/COVID19/Data/dataSet.csv", sep="\t", index_col=0, parse_dates=[0])
confirmed = dataSet['Confirmed']
recovered = dataSet['Recovered']
deaths = dataSet['Deaths']
dates = []
for i in range(len(dataSet.index)):
    dates.append(dataSet.index[i])

In [0]:
data = []
data.append(confirmed)
data.append(recovered)
data.append(deaths)

In [0]:
# Number of days from startDate to endDate
totalDays = 0
tmpDate = []
tmpDate.append(startDate)
while(tmpDate[-1] <= endDate):
    tmpDate.append(tmpDate[-1] + (datetime.timedelta(days=1)))
    totalDays = totalDays + 1

In [0]:
# Number of days from last in dataSet to endDate
days = 0
while(dates[-1] != endDate):
    dates.append((dates[-1]+ (datetime.timedelta(days=1))))
    days = days + 1

In [0]:
# Evaluate an ARIMA model for a given order (p,d,q)
from math import sqrt
def evaluateArimaModel(X, arimaOrder):
	# prepare training dataset
	train = X[0:len(X)-1]
	test = X[len(X)-1:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arimaOrder)
		model_fit = model.fit(disp=0)
		output = model_fit.forecast()[0]
		predictions.append(output)
		history.append(output)
	# calculate out of sample error
	error = sqrt(mean_squared_error(test, predictions))
	return error

In [0]:
# Evaluate combinations of p, d and q values for an ARIMA model
def evaluateModels(dataset, pValues, dValues, qValues):
	dataset = dataset.astype('float32')
	bestScore, bestCFG = float("inf"), None
	for p in pValues:
		for d in dValues:
			for q in qValues:
				order = (p,d,q)
				try:
					mse = evaluateArimaModel(dataset, order)
					if mse < bestScore:
						bestScore, bestCFG = mse, order
					#print('ARIMA%s RMSE=%.3f' % (order,mse))
				except:
					continue
	print('Best ARIMA%s RMSE=%.3f' % (bestCFG, bestScore))
	return bestCFG

In [37]:
forecast = []
for row in data:
    # Evaluate parameters
    pValues = [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
    dValues = range(0, 10)
    qValues = range(0, 10)
    warnings.filterwarnings("ignore")
    order = evaluateModels(row, pValues, dValues, qValues)

    X = row.values
    X = X.astype('float32')
    history = [x for x in X]
    pred= list()
    for t in range(days):
        model = ARIMA(history, order=order)
        modelFit = model.fit(disp=0)
        output = modelFit.forecast()[0]
        pred.append(output)
        history.append(output)

    forecast.append(pred)

Best ARIMA(1, 2, 1) RMSE=1.863
Best ARIMA(15, 2, 0) RMSE=9.772
Best ARIMA(2, 1, 0) RMSE=3.489


In [0]:
with open('/COVID19/Output/Forecast.txt', 'w+') as f:
  f.write('Date\tConfirmed\tRecovered\tDeaths\n')
  for i in range(totalDays):
    f.write(str(tmpDate[i].date()))
    f.write('\t')
    f.write(str(round(float(forecast[0][days-totalDays+i]))))
    f.write('\t')
    f.write(str(round(float(forecast[1][days-totalDays+i]))))
    f.write('\t')
    f.write(str(round(float(forecast[2][days-totalDays+i]))))
    f.write('\n')