# Let's begin by installing sktime

In [None]:
pip install sktime

# Importing the necessary libraries

In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as pp
%matplotlib inline
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sktime.forecasting.compose import ReducedRegressionForecaster
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss
from sktime.utils.plotting.forecasting import plot_ys


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Data cleaning and exploration

In [None]:
covid_ng = pd.read_csv('/kaggle/input/statistic_id1110871_coronavirus--covid-19--daily-cases-in-nigeria-as-of-july-20-2020.csv')
covid_ng.head()

In [None]:
covid_ng = covid_ng.dropna()
covid_ng.reset_index(drop = True, inplace = True)
covid_ng

In [None]:
covid_ng.columns

In [None]:
covid_ng = covid_ng.rename(columns = {'Coronavirus (COVID-19) daily cases in Nigeria as of July 20, 2020': 'Date',
                          'Unnamed: 1': 'Daily Cases'})
covid_ng.head()

In [None]:
pp.figure(figsize = (12, 10.5))
pp.bar(covid_ng['Date'][-10:], covid_ng['Daily Cases'][-10:])
pp.title('Covid-19 Daily Cases over the last 10 days')
pp.xlabel('Date')
pp.ylabel('Daily Cases')

# Setting the forecasting horizon

In [None]:
y = covid_ng['Daily Cases']
y_train, y_test = temporal_train_test_split(y)
y_test

In [None]:
covid_ng['Date'][108]

In [None]:
fh = np.arange(1, len(y_test) + 1)
fh

# Using Reduced Regression for time-series forecasting and evaluating different models

In [None]:
model = RandomForestRegressor(random_state = 0)
d_tree = DecisionTreeRegressor(random_state = 0)
xgbr = XGBRegressor(random_state = 0)

listy = [model, d_tree, xgbr]

for i in listy:
    forecaster = ReducedRegressionForecaster(i, window_length = 12)
    forecaster.fit(y_train)
    y_preds = forecaster.predict(fh)
    error = smape_loss(y_test, y_preds)
    print('For the {} model, the smape loss is {}'.format(i, error))

# Evaluating the chosen model's parameters

In [None]:
model = RandomForestRegressor(random_state = 0)
for i in range(6,12,1):
    forecaster = ReducedRegressionForecaster(model, window_length = i)
    forecaster.fit(y_train)
    y_preds = forecaster.predict(fh)
    error = smape_loss(y_test, y_preds)
    print('For the {} window length, the smape loss is {}'.format(i, error))

In [None]:
for i in range(50, 500+1, 50):
    model = RandomForestRegressor(n_estimators = i, random_state = 0)
    forecaster = ReducedRegressionForecaster(model, window_length = 10)
    forecaster.fit(y_train)
    y_preds = forecaster.predict(fh)
    error = smape_loss(y_test, y_preds)
    print('For the {} estimators, the smape loss is {}'.format(i, error))

In [None]:
for i in range(1, 20+1, 1):
    model = RandomForestRegressor(n_estimators = 50, max_depth = i, random_state = 0)
    forecaster = ReducedRegressionForecaster(model, window_length = 10)
    forecaster.fit(y_train)
    y_preds = forecaster.predict(fh)
    error = smape_loss(y_test, y_preds)
    print('For the {} estimators, the smape loss is {}'.format(i, error))

In [None]:
model = RandomForestRegressor(n_estimators = 50, max_depth = 13, random_state = 0)
forecaster = ReducedRegressionForecaster(model, window_length = 10)
forecaster.fit(y_train)
y_preds = forecaster.predict(fh)
smape_loss(y_test, y_preds)

In [None]:
y_preds

# Visualizing predictions alongside actual value

In [None]:
plot_ys(y_train, y_test, y_preds, labels=["y_train", "y_test", "y_pred"])

# Extra Analysis

In [None]:
covid_ng['Total Covid-19 Cases'] = covid_ng['Daily Cases'].cumsum()
covid_ng.head()

In [None]:
pp.figure(figsize = (12, 10.5))
pp.plot(covid_ng['Date'][-10:], covid_ng['Total Covid-19 Cases'][-10:])
pp.title('Covid-19 Curve from July 1th to July 20th')
pp.xlabel('Date')
pp.ylabel('Total Covid-19 Cases')