# Importing Libraries

In [97]:
import sys
import pandas as pd
import numpy as np
import math
import plotly.graph_objects as go
import plotly
from pmdarima import auto_arima
from sklearn.model_selection import train_test_split
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Ignoring warnings
import warnings
warnings.filterwarnings("ignore")

# To import the main.py file
sys.path.append('../')
from python_files import main

from fbprophet import Prophet

In [98]:
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

# Data Preprocessing Functions

In [99]:
def get_data():
    confirmed_global, deaths_global, recovered_global, country_cases = main.collect_data()
    
    recovered = recovered_global.groupby("country").sum().T
    deaths = deaths_global.groupby("country").sum().T
    confirmed = confirmed_global.groupby("country").sum().T
    
    deaths.index = pd.to_datetime(deaths.index, infer_datetime_format = True)
    recovered.index = pd.to_datetime(recovered.index, infer_datetime_format = True)
    confirmed.index = pd.to_datetime(confirmed.index, infer_datetime_format = True)
    
    return deaths, recovered, confirmed

In [100]:
def create_data_frame(dataframe,country):
    deaths, recovered, confirmed = get_data()
    if dataframe == 'deaths':
        data = pd.DataFrame(index = deaths.index, data = deaths[country].values, columns = ["Total"])

    elif dataframe == 'recovered':
        data = pd.DataFrame(index = recovered.index, data = recovered[country].values, columns = ["Total"])

    elif dataframe == 'confirmed':
        data = pd.DataFrame(index = confirmed.index, data = confirmed[country].values, columns = ["Total"])

    data = data[(data != 0).all(1)]
    
    data['Date'] = data.index
    cols = [data.columns[-1]] + [col for col in data if col != data.columns[-1]]
    data = data[cols]   

    return data

# Graphing Functions

In [101]:
def plot_forecast(data,forecast):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=data["Date"], y=data["Total"],   
                            mode='lines',
                            name='Up till now '))
        
    fig.add_trace(go.Scatter(x=forecast.index, y=forecast.values,   
                            mode='lines',
                            name='Prediction*'))

    fig.update_layout(title={
                'text': "Forecasted results",
                'y':0.9,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top'},
                        template = "plotly_dark",
                        xaxis_title="Date",
                        yaxis_title="Cases",
                        legend_title="Legend ",
                        font=dict(
                                family="Arial",
                                size=15,
                                color="white"
                                )
                        )
    return fig

# Functions to Train and Test the Model

In [102]:
def find_params(train_set):
    stepwise_model = auto_arima(train_set, method='nm', start_p = 0, start_q = 0,
                               max_p = 2, max_q = 2, m = 7,
                               start_P = 0, max_P=0, start_Q=1, max_Q=1, seasonal = True,
                               d = None, D = 1, n_jobs=-1, trace = True,
                               error_action = 'ignore',  
                               suppress_warnings = True, 
                               stepwise = True)
    return stepwise_model

In [103]:
def Predict(stepwise_model,train,test):
    
    stepwise_model.fit(train)
    
    pred = stepwise_model.predict(n_periods=len(test))
    
    pred = pd.DataFrame(pred,index = test.index,columns=['Prediction'])
   
    return pred

# Error Function

In [104]:
def mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Function for Forecasting

In [105]:
def Future(order,seasonal_order,train,test,data):
    
    model = SARIMAX(data['Total'],  
                        order = order,  
                        seasonal_order = seasonal_order) 
    result = model.fit() 
  
    forecast = result.predict(start = len(data),  
                          end = (len(data)-1) + 14).rename('Forecast') 
    
 
    error_check = result.predict(start = len(train), end = len(train) - 1 +len(test))
    error = mape(error_check,test)
    error = error
    graph = plot_forecast(data,forecast)

    return forecast,graph,error

# Calling Function

In [106]:
def arima_predict(df_name,country):
    data = create_data_frame(df_name,country)
    
    train = data["Total"][:len(data)*4//5]
    test = data["Total"][len(data)*4//5:]
    
    model = find_params(train)
    pred = Predict(model,train,test)
    mape_error = mape(test, pred["Prediction"])
    order=model.get_params()['order']
    seasonal_order=model.get_params()['seasonal_order']
    print("MAPE IS ",mape_error)
    forecast,graph,error = Future(order, seasonal_order, train, test,data)
    return forecast,graph,(error + np.std([error, mape_error]))

# Example

In [107]:
forecast,graph,error = arima_predict("confirmed","India")

Performing stepwise search to minimize aic
Fit ARIMA(0,2,0)x(0,1,1,7) [intercept=True]; AIC=2912.764, BIC=2922.224, Time=0.573 seconds
Fit ARIMA(0,2,0)x(0,1,0,7) [intercept=True]; AIC=2955.799, BIC=2962.106, Time=0.168 seconds
Fit ARIMA(1,2,0)x(0,1,0,7) [intercept=True]; AIC=2922.126, BIC=2931.586, Time=0.133 seconds
Fit ARIMA(0,2,1)x(0,1,1,7) [intercept=True]; AIC=2886.505, BIC=2899.118, Time=0.544 seconds
Fit ARIMA(0,2,0)x(0,1,0,7) [intercept=False]; AIC=2953.879, BIC=2957.033, Time=0.131 seconds
Fit ARIMA(0,2,1)x(0,1,0,7) [intercept=True]; AIC=2915.683, BIC=2925.143, Time=0.193 seconds
Fit ARIMA(1,2,1)x(0,1,1,7) [intercept=True]; AIC=2888.490, BIC=2904.256, Time=0.551 seconds
Fit ARIMA(0,2,2)x(0,1,1,7) [intercept=True]; AIC=2888.254, BIC=2904.021, Time=0.577 seconds
Fit ARIMA(1,2,0)x(0,1,1,7) [intercept=True]; AIC=2889.409, BIC=2902.022, Time=0.513 seconds
Fit ARIMA(1,2,2)x(0,1,1,7) [intercept=True]; AIC=2887.320, BIC=2906.240, Time=0.618 seconds
Total fit time: 4.016 seconds
MAPE I

In [108]:
print("PREDICTIONS FOR THE NEXT 14 DAYS : \n")
for i in range(len(forecast)):
    print(forecast.index[i] , "---->", int(forecast.values[i]))

PREDICTIONS FOR THE NEXT 14 DAYS : 

2020-09-14 00:00:00 ----> 4779235
2020-09-15 00:00:00 ----> 4811607
2020-09-16 00:00:00 ----> 4852642
2020-09-17 00:00:00 ----> 4892843
2020-09-18 00:00:00 ----> 4934282
2020-09-19 00:00:00 ----> 4975848
2020-09-20 00:00:00 ----> 4985817
2020-09-21 00:00:00 ----> 4998386
2020-09-22 00:00:00 ----> 5018446
2020-09-23 00:00:00 ----> 5047171
2020-09-24 00:00:00 ----> 5075060
2020-09-25 00:00:00 ----> 5104187
2020-09-26 00:00:00 ----> 5133443
2020-09-27 00:00:00 ----> 5131100


In [109]:
graph.show()

In [110]:
print("ALLOW AN ERROR OF UPTO (Based on the data at hand) :",error,"%")

ALLOW AN ERROR OF UPTO (Based on the data at hand) : 2.709404717772571 %


In [111]:
data = create_data_frame("confirmed","India")
data.columns=['ds','y']
data['Day']=data['ds'].dt.dayofweek
data['isWeekend'] = (data['Day'] // 5 == 1).astype(int)
data

Unnamed: 0,ds,y,Day,isWeekend
2020-01-30,2020-01-30,1,3,0
2020-01-31,2020-01-31,1,4,0
2020-02-01,2020-02-01,1,5,1
2020-02-02,2020-02-02,2,6,1
2020-02-03,2020-02-03,3,0,0
...,...,...,...,...
2020-09-09,2020-09-09,4465863,2,0
2020-09-10,2020-09-10,4562414,3,0
2020-09-11,2020-09-11,4659984,4,0
2020-09-12,2020-09-12,4754356,5,1


In [112]:
prophet= Prophet (growth="linear",
                
                  seasonality_mode="additive",
                  changepoint_prior_scale=30,
                  seasonality_prior_scale=35,
                  daily_seasonality=False,
                  weekly_seasonality=False,
                  yearly_seasonality=False,).add_seasonality(name="monthly",period=30.5,fourier_order=55).add_seasonality(name="weekly",period=7,fourier_order=15).add_seasonality(name="daily",period=1,fourier_order=15)

In [113]:
# prophet.add_regressor('isWeekend')
# prophet.add_regressor('Day')
# prophet.add_regressor('New')

In [114]:
prophet.fit(data)

<fbprophet.forecaster.Prophet at 0x7f09e50c9790>

In [121]:
future=prophet.make_future_dataframe(freq='D',periods=14)
future['Day']=future['ds'].dt.dayofweek
future['isWeekend'] = (future['Day'] // 5 == 1).astype(int)
forecast=prophet.predict(future)
# forecast.columns

In [122]:
forecast['yhat']
# m=Prophet(seasonality_mode='multiplicative')
# m.fit(data)
# future=m.make_future_dataframe(freq='D',periods=14)
# forecast=m.predict(future)
# forecast['yhat']

0      1.628208e+03
1      8.486365e+02
2      2.326042e+03
3     -1.746952e+03
4     -2.852839e+03
           ...     
237    5.565862e+06
238    5.634964e+06
239    5.701154e+06
240    5.767442e+06
241    5.830067e+06
Name: yhat, Length: 242, dtype: float64

In [123]:
# prophet.plot(forecast)

In [124]:
from fbprophet.diagnostics import cross_validation
df_cv = cross_validation(prophet,initial=f'{0.8*len(data)} days', horizon = '7 days')
# from fbprophet.diagnostics import performance_metrics
# df_p = performance_metrics(df_cv)
# df_p.head()

INFO:fbprophet:Making 11 forecasts with cutoffs between 2020-08-02 00:00:00 and 2020-09-06 00:00:00


In [125]:
for i in range(len(df_cv)):
    df_cv.yhat.values[i]=int(df_cv.yhat.values[i])
mpe=mape(df_cv.y,df_cv.yhat)
mpe

1.5466394160936934

In [126]:
ftr = (data.index + pd.Timedelta(14, unit='days')).to_frame()
ftr=ftr[len(ftr)-14:]
ftr['Total']=forecast[len(data):]['yhat'].values
ftr=ftr['Total']
ftr
# len(ftr)

2020-09-14    4.885758e+06
2020-09-15    4.961854e+06
2020-09-16    5.042726e+06
2020-09-17    5.121433e+06
2020-09-18    5.198874e+06
2020-09-19    5.276449e+06
2020-09-20    5.348429e+06
2020-09-21    5.425330e+06
2020-09-22    5.493284e+06
2020-09-23    5.565862e+06
2020-09-24    5.634964e+06
2020-09-25    5.701154e+06
2020-09-26    5.767442e+06
2020-09-27    5.830067e+06
Freq: D, Name: Total, dtype: float64

In [127]:
def plot_forecast(data,forecast):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=data["ds"], y=data["y"],   
                            mode='lines',
                            name='Up till now '))
        
    fig.add_trace(go.Scatter(x=forecast.index, y=forecast.values,   
                            mode='lines',
                            name='Prediction*'))

    fig.update_layout(title={
                'text': "Forecasted results",
                'y':0.9,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top'},
                        template = "plotly_dark",
                        xaxis_title="Date",
                        yaxis_title="Cases",
                        legend_title="Legend ",
                        font=dict(
                                family="Arial",
                                size=15,
                                color="white"
                                )
                        )
    return fig

In [128]:
plot_forecast(data,ftr)