<a href="https://colab.research.google.com/github/dchatterjee/time-series-forcasting/blob/main/analysis/time_series/ts_1_curves_pycaret_edition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Here is the original notebook which is converted into a PyCaret flow in this notebook: https://www.kaggle.com/konradb/ts-1-curves**

In this notebook I will present the simplest approach to time series: reducing the problem to curve fitting. The content was presented live on Abhishek Thakur's youtube channel - it's probably best to watch in parallel with following the notebook: https://www.youtube.com/watch?v=kAI67Sz92-s

In [None]:
def what_is_installed():
    import pycaret
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except:
    !pip install prophet
    !pip install pycaret-ts-alpha
    what_is_installed()


System:
    python: 3.7.12 (default, Jan 15 2022, 18:48:18)  [GCC 7.5.0]
executable: /usr/bin/python3
   machine: Linux-5.4.144+-x86_64-with-Ubuntu-18.04-bionic

Python dependencies:
          pip: 21.1.3
   setuptools: 57.4.0
      pycaret: 3.0.0
      sklearn: 1.0.2
       sktime: 0.10.1
  statsmodels: 0.12.2
        numpy: 1.21.5
        scipy: 1.7.3
       pandas: 1.3.5
   matplotlib: 3.2.2
       plotly: 5.5.0
       joblib: 1.0.1
        numba: 0.55.1
       mlflow: 1.24.0
     lightgbm: 3.3.2
      xgboost: 0.90
     pmdarima: 1.8.5
        tbats: Installed but version unavailable
      prophet: 1.0
      tsfresh: Not installed


In [None]:
import numpy as np 
import pandas as pd

#### Not needed for PyCaret
# import os
# import matplotlib.pyplot as plt
# import seaborn as sns
# import statsmodels.api as sm
# plt.style.use('fivethirtyeight')

In [None]:
#### Not needed for PyCaret
# class CFG:
#     img_dim1 = 20
#     img_dim2 = 10
    
# plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})    

# Data and EDA

\begin{equation}
X_t = T_t + S_t + C_t + \epsilon_t
\end{equation}

Dedicated notebook: https://www.kaggle.com/konradb/ts-0-the-basics


In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/pycaret/public_examples/main/data/us_energy.csv")
df.head(3)

Unnamed: 0,date,value
0,1973-01-01,160.218
1,1973-01-31,143.539
2,1973-03-02,148.158


In [None]:
#### Set Index appropriately.
# Some measurements are taken on both 1st and last of month leading to duplicate index values for some months.
# So offset by a few days before setting index so that we have measurements for all months.
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'] + pd.DateOffset(5)
df['date'] = pd.PeriodIndex(df['date'], freq='M')
df.set_index('date', inplace = True)
# df.plot()

In [None]:
df.index.duplicated().sum() == 0, df.index

(True,
 PeriodIndex(['1973-01', '1973-02', '1973-03', '1973-04', '1973-05', '1973-06',
              '1973-07', '1973-08', '1973-09', '1973-10',
              ...
              '2012-09', '2012-10', '2012-11', '2012-12', '2013-01', '2013-02',
              '2013-03', '2013-04', '2013-05', '2013-06'],
             dtype='period[M]', name='date', length=486))

In [None]:
from pycaret.time_series import TSForecastingExperiment

# Change renderer appropriately based on where the notebook is being run ----
# Refer to plotly for available renderers.
global_plot_settings = {"renderer": "colab", "hoverinfo": "text"}

exp = TSForecastingExperiment()
exp.setup(data = df, seasonal_period=12, fh=72, fig_kwargs=global_plot_settings, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,value
2,Original Data,"(486, 1)"
3,Missing Values,False
4,Approach,Univariate
5,Exogenous Variables,Not Present
6,Transformed Train Target,"(414,)"
7,Transformed Test Target,"(72,)"
8,Transformed Train Exogenous,"(414, 0)"
9,Transformed Test Exogenous,"(72, 0)"


<pycaret.internal.pycaret_experiment.time_series_experiment.TSForecastingExperiment at 0x7f5f86dd9c10>

In [None]:
# #### Using Manual Approach ----
# from statsmodels.tsa.seasonal import seasonal_decompose

# plt.rcParams.update({'figure.figsize': (CFG.img_dim1,CFG.img_dim2)})

# decomposition = seasonal_decompose(df, period = 12)
# figure = decomposition.plot()
# plt.show()

################################
#### Using PyCaret Approach ####
################################
exp.plot_model(plot="decomp")

In [None]:
# #### Using Manual Approach ----
# decomposition = seasonal_decompose(df['value'], period = 11, model = 'multiplicative')
# figure = decomposition.plot()
# plt.show()

################################
#### Using PyCaret Approach ####
################################
# Currently only works with detected frequency. Will add option to specify period manually here in the future.

In [None]:
# #### Using Manual Approach ----
# decomposition = seasonal_decompose(df['value'], period = 12, model = 'multiplicative')
# figure = decomposition.plot()
# plt.show()

################################
#### Using PyCaret Approach ####
################################
exp.plot_model(plot="decomp", data_kwargs={"type": "multiplicative"})

In [None]:
# #### Using Manual Approach ----
# from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# plot_acf(df['value'])
# print()

################################
#### Using PyCaret Approach ####
################################
exp.plot_model(plot="acf")

In [None]:
# #### Using Manual Approach ----
# plot_pacf(df['value'])
# print()

################################
#### Using PyCaret Approach ####
################################
exp.plot_model(plot="pacf")

# Smoothing methods

Dedicated notebook: https://www.kaggle.com/konradb/ts-1a-smoothing-methods

In [None]:
# #### Using Manual Approach ----
# # training / validation split
# cutoff_date = '2005-12-31'
# df['value'] /= 100
# xtrain, xvalid  = df.loc[df.index <= cutoff_date], df.loc[df.index > cutoff_date]
# print(xtrain.shape, xvalid.shape)

################################
#### Using PyCaret Approach ####
################################
# NOTE: Train, Test Split is done using fh value (note the splits in the setup disaply for details)
exp = TSForecastingExperiment()
exp.setup(data = df["value"]/200, seasonal_period=12, fh=72, fig_kwargs=global_plot_settings, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,value
2,Original Data,"(486, 1)"
3,Missing Values,False
4,Approach,Univariate
5,Exogenous Variables,Not Present
6,Transformed Train Target,"(414,)"
7,Transformed Test Target,"(72,)"
8,Transformed Train Exogenous,"(414, 0)"
9,Transformed Test Exogenous,"(72, 0)"


<pycaret.internal.pycaret_experiment.time_series_experiment.TSForecastingExperiment at 0x7f5f7ee4f3d0>

In [None]:
exp.plot_model()

In [None]:
################################
#### Using PyCaret Approach ####
################################
# Additional plots provided by PyCaret to help understand what is being done.
exp.plot_model(plot="train_test_split")
exp.plot_model(plot="cv")

In [None]:
# #### Using Manual Approach ----
# from statsmodels.tsa.holtwinters import ExponentialSmoothing

# fit1 = ExponentialSmoothing(xtrain['value'].values, seasonal_periods = 12,
#                            trend = 'mul', seasonal = 'mul')
# fit1 = fit1.fit()

################################
#### Using PyCaret Approach ####
################################
# No need to explicitly specify seasonal_period = 12.
# This was automatically detected in the setup and will be used in the models by default.
# Also, added advantage is that PyCaret will do temporal cross-validaton by default (default folds = 3)
fit1 = exp.create_model("exp_smooth", trend = 'mul', seasonal = 'mul')

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,1989-06,0.0594,0.0713,0.0458,0.0445,1.5898,1.4953,0.5548
1,1995-06,0.0454,0.0595,0.0295,0.0297,1.1667,1.1726,0.8411
2,2001-06,0.046,0.0589,0.0283,0.0278,1.0817,1.0685,0.8621
Mean,NaT,0.0503,0.0632,0.0345,0.034,1.2794,1.2455,0.7527
SD,NaT,0.0064,0.0057,0.008,0.0075,0.2222,0.1817,0.1402


In [None]:
# #### Using Manual Approach ----
# prediction = fit1.forecast(len(xvalid)).copy()

# xresiduals = xvalid['value'] - prediction

################################
#### Using PyCaret Approach ####
################################
prediction = exp.predict_model(fit1)

# NOTE: We will work on in-sample residuals that are provided by PyCaret automatically
# But if you need out of sample residuals, you can get them like this.
xresiduals = exp.get_config("y_test") - prediction

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Exponential Smoothing,0.1466,0.1678,0.0879,0.0831,3.3756,2.9888,0.0756


In [None]:
# #### Using Manual Approach ----
# fit1.params_formatted

################################
#### Using PyCaret Approach ####
################################-
fit1

ExponentialSmoothing(damped_trend=False, damping_trend=None, initial_level=None,
                     initial_seasonal=None, initial_trend=None,
                     initialization_method='estimated', method=None,
                     minimize_kwargs=None, optimized=True, remove_bias=False,
                     seasonal='mul', smoothing_level=None,
                     smoothing_seasonal=None, smoothing_trend=None, sp=12,
                     start_params=None, trend='mul', use_boxcox=None,
                     use_brute=True)

In [None]:
# #### Using Manual Approach ----
# plot_acf(xresiduals, lags = 25)
# print()

################################
#### Using PyCaret Approach ####
################################
# Automatically plots the ACF for the residuals 
exp.plot_model(fit1, plot="acf")

In [None]:
# #### Using Manual Approach ----
# plot_pacf(xresiduals, lags = 25)
# print()

################################
#### Using PyCaret Approach ####
################################
# Automatically plots the PACF for the residuals 
exp.plot_model(fit1, plot="pacf")

# Prophet

Dedicated notebook:  https://www.kaggle.com/konradb/ts-1b-prophet

Dataset: https://www.kaggle.com/piantic/autonomous-greenhouse-challengeagc-2nd-2019

In [None]:
# from fbprophet import Prophet

In [None]:
xdat = pd.read_csv("https://raw.githubusercontent.com/pycaret/public_examples/main/data/GreenhouseClimate1.csv")
xdat.head(3)

Unnamed: 0,time,AssimLight,BlackScr,CO2air,Cum_irr,EC_drain_PC,EnScr,HumDef,PipeGrow,PipeLow,...,t_rail_min_sp,t_rail_min_vip,t_vent_sp,t_ventlee_vip,t_ventwind_vip,water_sup,water_sup_intervals_sp_min,water_sup_intervals_vip_min,window_pos_lee_sp,window_pos_lee_vip
0,43815.0,0.0,35.0,439.0,0.48,3.61,96.0,6.96,0.0,50.9,...,,0.0,,25.0,26.0,4.0,,1000.0,,1.2
1,43815.00347,0.0,85.000001,459.0,0.72,3.61,96.0,7.45,0.0,49.3,...,,0.0,,25.0,26.0,6.0,,1000.0,,1.2
2,43815.00694,0.0,95.999999,461.0,0.72,3.61,94.6,5.99,0.0,47.6,...,,0.0,,25.0,26.0,6.0,,1000.0,,1.2


In [None]:
xdat['time'] = pd.to_datetime(xdat['time'], unit = 'D', origin = "1899-12-30")
xdat.head(3)

Unnamed: 0,time,AssimLight,BlackScr,CO2air,Cum_irr,EC_drain_PC,EnScr,HumDef,PipeGrow,PipeLow,...,t_rail_min_sp,t_rail_min_vip,t_vent_sp,t_ventlee_vip,t_ventwind_vip,water_sup,water_sup_intervals_sp_min,water_sup_intervals_vip_min,window_pos_lee_sp,window_pos_lee_vip
0,2019-12-16 00:00:00.000000000,0.0,35.0,439.0,0.48,3.61,96.0,6.96,0.0,50.9,...,,0.0,,25.0,26.0,4.0,,1000.0,,1.2
1,2019-12-16 00:04:59.808000256,0.0,85.000001,459.0,0.72,3.61,96.0,7.45,0.0,49.3,...,,0.0,,25.0,26.0,6.0,,1000.0,,1.2
2,2019-12-16 00:09:59.616000000,0.0,95.999999,461.0,0.72,3.61,94.6,5.99,0.0,47.6,...,,0.0,,25.0,26.0,6.0,,1000.0,,1.2


In [None]:
list1 = ['time', 'Tair']

list2 = ['co2_sp', 'dx_sp', 't_rail_min_sp', 't_grow_min_sp',
        'assim_sp', 'scr_enrg_sp', 'scr_blck_sp', 't_heat_sp',
        't_vent_sp', 'window_pos_lee_sp', 'water_sup_intervals_sp_min',
        'int_blue_sp', 'int_red_sp', 'int_farred_sp',
        'int_white_sp']

xdat = xdat[list1 + list2]

In [None]:
xdat2 = xdat.resample('H', on = 'time').mean().reset_index()[list1 + list2]

In [None]:
xdat2.isna().sum()

time                            0
Tair                            2
co2_sp                         40
dx_sp                          41
t_rail_min_sp                  41
t_grow_min_sp                 107
assim_sp                       41
scr_enrg_sp                    41
scr_blck_sp                    41
t_heat_sp                      40
t_vent_sp                      40
window_pos_lee_sp              41
water_sup_intervals_sp_min     84
int_blue_sp                     0
int_red_sp                      0
int_farred_sp                   0
int_white_sp                    0
dtype: int64

In [None]:
#### Using Manual Approach ----
# np.isnan(xdat2[['Tair'] + list2]).describe()
# np.isnan(xdat2[['Tair'] + list2]).describe()
# xdat2.dropna(subset = ['Tair'], inplace = True)
# xdat2['Tair'][0:1000].plot()

################################
#### Using PyCaret Approach ####
################################
# Simple Imputation ----
# NOTE: Imputation will be added to PyCaret in a future release so this step will be taken care of in the future.
# For now, we will do it manually
xdat2.fillna(method="ffill", inplace=True)
xdat2.fillna(method="bfill", inplace=True)
xdat2.isna().sum()

time                          0
Tair                          0
co2_sp                        0
dx_sp                         0
t_rail_min_sp                 0
t_grow_min_sp                 0
assim_sp                      0
scr_enrg_sp                   0
scr_blck_sp                   0
t_heat_sp                     0
t_vent_sp                     0
window_pos_lee_sp             0
water_sup_intervals_sp_min    0
int_blue_sp                   0
int_red_sp                    0
int_farred_sp                 0
int_white_sp                  0
dtype: int64

In [None]:
xdat2.set_index('time', inplace = True)
xdat2.index.freq = "H"

In [None]:
#### Using Manual Approach ----
# df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})
df = xdat2.copy()

################################
#### Using PyCaret Approach ####
################################
# NOTE: No need to rename columns for PyCaret since it provides a consistent
# interface to modeling irrespective of what kind of model is being developed.
# Example below shows a similar interface to what we did for Exponential Smoothing above.

from pycaret.time_series import TSForecastingExperiment

exp = TSForecastingExperiment()
exp.setup(data = df["Tair"], fh=24, fig_kwargs=global_plot_settings, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,Tair
2,Original Data,"(3985, 1)"
3,Missing Values,False
4,Approach,Univariate
5,Exogenous Variables,Not Present
6,Transformed Train Target,"(3961,)"
7,Transformed Test Target,"(24,)"
8,Transformed Train Exogenous,"(3961, 0)"
9,Transformed Test Exogenous,"(24, 0)"


<pycaret.internal.pycaret_experiment.time_series_experiment.TSForecastingExperiment at 0x7f5f810c1410>

In [None]:
#### Using Manual Approach ----
# m = Prophet(weekly_seasonality=False, 
#             interval_width = 0.95,
#             mcmc_samples = 10)

# m.fit(df)

# from fbprophet.diagnostics import cross_validation, performance_metrics
# df_cv = cross_validation(m,initial = '3700 hours', period = '24 hours', horizon = '24 hours')
# df_cv.head(10)

# df_p1 = performance_metrics(df_cv)
# df_p1.head(10)

################################
#### Using PyCaret Approach ####
################################- 
# Create Model
m = exp.create_model("prophet", weekly_seasonality=False, interval_width = 0.95)

# Store CV Metrics
df_p1 = exp.pull()

# NOTE: The results of the cross validation was provided when we fit the models. Also,
# the cross-validation format was consistent with any other model developed through pycaret
# allowing for easier and consistent comparison of models from different packages.

# Also note that multiple metrics are provided by default. Again, these are
# provided in a consistent manner so that these can be compared across models.
# Users can customize these as needed (add/delete more metrics). Refer to 
# ?exp.remove_metric
# ?exp.add_metric

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,2020-05-26,1.9189,2.2358,0.0826,0.0836,2.4734,1.8845,0.5378
1,2020-05-27,1.749,2.1379,0.0825,0.0801,2.2577,1.8053,0.5643
2,2020-05-28,2.6483,3.3378,0.1126,0.114,3.4161,2.8217,0.3173
Mean,NaT,2.1054,2.5705,0.0926,0.0926,2.7157,2.1705,0.4731
SD,NaT,0.3901,0.544,0.0142,0.0152,0.503,0.4616,0.1107


In [None]:
# #### Using Manual Approach ----
# future = m.make_future_dataframe(periods= 24, freq = 'H')
# forecast = m.predict(future)
# m.plot_components(forecast, figsize=(CFG.img_dim1, CFG.img_dim2))
# print()

################################
#### Using PyCaret Approach ####
################################
forecast = exp.predict_model(m)
exp.plot_model(m)

Unnamed: 0,Model,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,Prophet,1.5867,1.9906,0.0743,0.0717,2.04,1.677,0.661


In [None]:
from sklearn.linear_model import Ridge
from sklearn.feature_selection import RFE
# incorporate the climate and irrigation setpoints

xdat2 = xdat2.dropna()

estimator = Ridge()
selector = RFE(estimator, n_features_to_select=3, step=1)
selector = selector.fit(xdat2[list2], xdat2['Tair'])
 
to_keep = xdat2[list2].columns[selector.support_]
print(to_keep)

Index(['dx_sp', 't_vent_sp', 'window_pos_lee_sp'], dtype='object')


In [None]:
#### Using Manual Approach ----
# df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})

# m = Prophet(weekly_seasonality=False, interval_width = 0.95)

# # add the regressors to the dataframe holding the data
# for f in to_keep:    
#     df[f] = xdat2[f]
#     m.add_regressor(f)

################################
#### Using PyCaret Approach ####
################################
# NOTE: Similar to the previous example, no need to rename columns for PyCaret
# since it provides a consistent interface to modeling irrespective of what kind
# of model is being developed. Also, no need to add exogenous variables separately.
# Just specify the target, and rest are treated as exogenous variables.

from pycaret.time_series import TSForecastingExperiment
to_keep = ["Tair"] + to_keep.to_list()
exp = TSForecastingExperiment()
exp.setup(data = xdat2[to_keep], target = "Tair", fh=24, fig_kwargs=global_plot_settings, session_id=42)

Unnamed: 0,Description,Value
0,session_id,42
1,Target,Tair
2,Original Data,"(3985, 4)"
3,Missing Values,False
4,Approach,Univariate
5,Exogenous Variables,Present
6,Transformed Train Target,"(3961,)"
7,Transformed Test Target,"(24,)"
8,Transformed Train Exogenous,"(3961, 3)"
9,Transformed Test Exogenous,"(24, 3)"


<pycaret.internal.pycaret_experiment.time_series_experiment.TSForecastingExperiment at 0x7f5f7e69c310>

In [None]:
#### Using Manual Approach ----
# # the rest proceeds as before. 
# m.fit(df)

# # we repeat the same evaluation tactic as before
# df_cv = cross_validation(m, initial = '3700 hours', period = '24 hours', horizon='24 hours')
# df_p2 = performance_metrics(df_cv)

################################
#### Using PyCaret Approach ####
################################
# Create Model 
m = exp.create_model("prophet")

# Store CV Metrics
df_p2 = exp.pull()

Unnamed: 0,cutoff,MAE,RMSE,MAPE,SMAPE,MASE,RMSSE,R2
0,2020-05-26,0.5109,0.6666,0.0215,0.0218,0.6586,0.5619,0.9589
1,2020-05-27,0.8624,1.1138,0.042,0.0405,1.1132,0.9406,0.8817
2,2020-05-28,1.1259,1.4575,0.0527,0.0507,1.4524,1.2322,0.8698
Mean,NaT,0.8331,1.0793,0.0387,0.0377,1.0747,0.9115,0.9035
SD,NaT,0.2519,0.3238,0.013,0.012,0.3252,0.2744,0.0395


In [None]:
#### Using Manual Approach ----
# comparison = pd.DataFrame()
# comparison['raw'] = df_p1.mean(axis = 0)[1:]
# comparison['covariates'] = df_p2.mean(axis = 0)[1:]
# print(comparison)

################################
#### Using PyCaret Approach ####
################################
comparison = pd.DataFrame()
comparison['raw'] = df_p1.loc["Mean"]
comparison['covariates'] = df_p2.loc["Mean"]
print(comparison)

           raw covariates
cutoff     NaT        NaT
MAE     2.1054     0.8331
RMSE    2.5705     1.0793
MAPE    0.0926     0.0387
SMAPE   0.0926     0.0377
MASE    2.7157     1.0747
RMSSE   2.1705     0.9115
R2      0.4731     0.9035


Neural Prophet NN component: https://www.arxiv-vanity.com/papers/1911.12436/

NOTE: This is currently not supported by PyCaret.


In [None]:
# !pip install neuralprophet

In [None]:
# from neuralprophet import NeuralProphet

In [None]:
# df = xdat2[['time', 'Tair']].rename(columns={"time": "ds", "Tair": "y"})

# model = NeuralProphet(weekly_seasonality = False)

# model.fit(df, freq="H")
# # forecast
# df_predict = model.make_future_dataframe(df, periods= 24)
# df_predict = model.predict(df_predict)
# fig = model.plot(df_predict)