# Predictive Analysis With Auto ARIMA 

## Data Collection and Preprocessing

In [1]:
# Imports
import sys
import os
from datetime import datetime
from datetime import date
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima import auto_arima

# To import the main.py file
sys.path.append('../')
from python_files import main

# Getting all the data
confirmed_global, deaths_global, recovered_global, country_cases = main.collect_data()
rec = recovered_global.groupby("country").sum().T
death=deaths_global.groupby("country").sum().T
conf=confirmed_global.groupby("country").sum().T
death.index = pd.to_datetime(death.index, infer_datetime_format = True)
rec.index=pd.to_datetime(rec.index, infer_datetime_format=True)
conf.index=pd.to_datetime(conf.index, infer_datetime_format=True)

In [2]:
# Setting up plotly to work offline and in jupyter notebooks
pyo.init_notebook_mode(connected = True)
%matplotlib inline

In [3]:
def create_data_frame(Type):
    if Type=='Death':
        # Creating a dataframe with Total number of cases everyday in a column
        data = pd.DataFrame(index = death.index, data = death["India"].values, columns = ["Total"])
    elif Type=='Recovery':
        # Creating a dataframe with Total number of cases everyday in a column
        data = pd.DataFrame(index = rec.index, data = rec["India"].values, columns = ["Total"])
    elif Type=='Confirmed':
         # Creating a dataframe with Total number of cases everyday in a column
        data = pd.DataFrame(index = conf.index, data = conf["India"].values, columns = ["Total"])
    data['Date'] = data.index
    # Setting Date column as index
    data=data.set_index('Date', drop=True)
    # Making sure no zeroes exist in dataframe
    data = data[(data != 0).all(1)]
    return data


In [4]:
# Plotting the Data
def plot_data_frame():
    return px.line(data, template = 'plotly_dark')

In [5]:
def find_params():
    stepwise_model = auto_arima(data, start_p = 1, start_q = 1,
                               max_p = 3, max_q = 3, m = 12,
                               start_P = 0, seasonal = True,
                               d = 1, D = 1, trace = True,
                               error_action = 'ignore',  
                               suppress_warnings = True, 
                               stepwise = True)
    return stepwise_model
    # Finding the pest p,d,q parametes for the model

In [6]:
def split_dataframe(split_date):
    # Setting a Split date for test and train datasets
    train = data.loc[:split_date]
    test = data.loc[split_date:]
    print('Test shape:',test.shape)
    print('Train shape:',train.shape)
    print(f'Min date from train set: {train.index.min().date()}')
    print(f'Max date from train set:{train.index.max().date()}')
    print(f'Min date from test set: {test.index.min().date()}')
    print(f'Max date from test set: {test.index.max().date()}')
    return train,test

In [7]:
def forecast():
    # Fitting the model with train dataset
    stepwise_model.fit(train)
    # Forecasting
    future_forecast = stepwise_model.predict(n_periods=len(test))
    # This returns an array of predictions:
    print(future_forecast)
    future_forecast = pd.DataFrame(future_forecast,index = test.index,columns=['Prediction'])
    # Plotting the predicted values and actual values
    return px.line(pd.concat([data,future_forecast],axis=1), template = 'plotly_dark'),future_forecast

In [8]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

## Recovery

In [9]:
data=create_data_frame('Recovery')
plot_data_frame()

In [None]:
stepwise_model=find_params()
print(stepwise_model.aic())


Performing stepwise search to minimize aic
Fit ARIMA(1,1,1)x(0,1,1,12) [intercept=True]; AIC=2632.421, BIC=2647.339, Time=5.466 seconds
Fit ARIMA(0,1,0)x(0,1,0,12) [intercept=True]; AIC=2759.600, BIC=2765.568, Time=0.047 seconds
Fit ARIMA(1,1,0)x(1,1,0,12) [intercept=True]; AIC=2689.742, BIC=2701.677, Time=3.335 seconds
Fit ARIMA(0,1,1)x(0,1,1,12) [intercept=True]; AIC=2746.598, BIC=2758.532, Time=2.530 seconds
Fit ARIMA(0,1,0)x(0,1,0,12) [intercept=False]; AIC=2807.421, BIC=2810.405, Time=0.066 seconds
Fit ARIMA(1,1,1)x(0,1,0,12) [intercept=True]; AIC=2662.896, BIC=2674.830, Time=0.669 seconds
Fit ARIMA(1,1,1)x(1,1,1,12) [intercept=True]; AIC=2627.291, BIC=2645.192, Time=6.681 seconds
Fit ARIMA(1,1,1)x(1,1,0,12) [intercept=True]; AIC=2631.161, BIC=2646.079, Time=5.106 seconds
Fit ARIMA(1,1,1)x(2,1,1,12) [intercept=True]; AIC=2635.853, BIC=2656.738, Time=62.520 seconds
Near non-invertible roots for order (1, 1, 1)(2, 1, 1, 12); setting score to inf (at least one inverse root too close 

In [None]:
split_date = pd.Timestamp('2020-05-24')
train,test=split_dataframe(split_date)
fig,future_forecast=forecast()
fig

In [None]:
test

In [None]:
rng = pd.date_range('2020-07-24', periods=7, freq='D')
df = pd.DataFrame({ 'Date': rng})
df.index=df.Date
df

In [None]:
stepwise_model.fit(train)
# Forecasting
future_forecast = stepwise_model.predict(n_periods=7)
# This returns an array of predictions:
print(future_forecast)
f = pd.DataFrame(index = df.index, data = future_forecast, columns = ["Total"])
future_forecast = pd.DataFrame(future_forecast,index = df.index,columns=['Prediction'])
# Plotting the predicted values and actual values
px.line(f, template = 'plotly_dark')

In [None]:
f

In [None]:
mean_absolute_percentage_error(test,future_forecast)

In [None]:
future_forecast['Actual']=test.Total
future_forecast

## Deaths

In [None]:
data=create_data_frame('Death')
plot_data_frame()

In [None]:
stepwise_model=find_params()
print(stepwise_model.aic())

In [None]:
split_date = pd.Timestamp('2020-06-13')
train,test=split_dataframe(split_date)
fig,future_forecast=forecast()
fig

In [None]:
numbers = [1, 2, 3, 7, 9]
window_size = 3

i = 0
moving_averages = []
while i < len(numbers) - window_size + 1:
    this_window = numbers[i : i + window_size]
get current window

    window_average = sum(this_window) / window_size
    moving_averages.append(window_average)
    i += 1

print(moving_averages)

In [None]:
mean_absolute_percentage_error(test,future_forecast)

In [None]:
future_forecast['Actual']=test.Total
future_forecast

## Confirmed

In [None]:
data=create_data_frame('Confirmed')
plot_data_frame()

In [None]:
stepwise_model=find_params()
print(stepwise_model.aic())

In [None]:
split_date = pd.Timestamp('2020-06-07')
train,test=split_dataframe(split_date)
fig,future_forecast=forecast()
fig

In [None]:
mean_absolute_percentage_error(test,future_forecast)

In [None]:
future_forecast['Actual']=test.Total
future_forecast