# Machine Learning for Time Series

# PyCaret

In [1]:
import pycaret
pycaret.__version__

'3.0.0'

In [2]:
import pandas as pd
from pathlib import Path
from pycaret.time_series import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
file = Path('../datasets/Residential_Energy_Consumption.csv')
col = ['Month','Total Energy Consumed by the Residential Sector']
df = pd.read_csv(file, index_col='Month', usecols=col, parse_dates=True)
df.columns = ['y']
df.head()

Unnamed: 0_level_0,y
Month,Unnamed: 1_level_1
1973-01-01,1957.641
1973-02-01,1712.143
1973-03-01,1510.079
1973-04-01,1183.421
1973-05-01,1006.326


In [4]:
df.isna().sum()

y    0
dtype: int64

In [5]:
df.tail()

Unnamed: 0_level_0,y
Month,Unnamed: 1_level_1
2021-08-01,1755.212
2021-09-01,1456.901
2021-10-01,1315.799
2021-11-01,1644.306
2021-12-01,2032.352


In [6]:
fig_settings = {"width": 1000, "height": 600}

s = setup(
    data=df, session_id=22,
    fh=16, fold=5,
    fig_kwargs=fig_settings
    )

Unnamed: 0,Description,Value
0,session_id,22
1,Target,y
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(588, 1)"
5,Transformed data shape,"(588, 1)"
6,Transformed train set shape,"(572, 1)"
7,Transformed test set shape,"(16, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [7]:
model = s.compare_models(sort='SMAPE')

In [8]:
plot_model(model, plot='forecast', 
           data_kwargs = {'fh' : 36})

ValueError: No objects to concatenate

In [None]:
model

In [None]:
plot_model(model, plot='cv')

In [None]:
plot_model(model, plot='diagnostics')

In [None]:
model_tuned = tune_model(model)

In [None]:
model

In [None]:
model_tuned

In [None]:
final = finalize_model(model_tuned)
save_model(final, 'energy_exp_model')
energy_model = load_model('energy_exp_model')

In [None]:
import numpy as np
fh = np.arange(24)
energy_model.predict(fh=fh)

## There is more

In [9]:
econ = pd.read_csv('../datasets/economic_df.csv')
econ.head()

Unnamed: 0,DATE,FEDFUNDS,unrate,CORESTICKM159SFRBATL
0,1990-01-01,8.23,5.4,4.920589
1,1990-02-01,8.24,5.3,4.935526
2,1990-03-01,8.28,5.2,5.208701
3,1990-04-01,8.26,5.4,5.294134
4,1990-05-01,8.18,5.4,5.183754


In [10]:
econ.shape

(386, 4)

In [11]:
s = setup(
    data=econ, session_id=23,
    fh=12, fold=3,
    fig_kwargs=fig_settings, 
    enforce_exogenous=True,
    target='unrate', index='DATE',
    ignore_features=['CORESTICKM159SFRBATL']
    )

Unnamed: 0,Description,Value
0,session_id,23
1,Target,unrate
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(386, 2)"
5,Transformed data shape,"(386, 2)"
6,Transformed train set shape,"(374, 2)"
7,Transformed test set shape,"(12, 2)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [12]:
model = s.compare_models()

In [13]:
model

[]

## See Also

1. Makridakis, Spyros, Evangelos Spiliotis, and Vassilios Assimakopoulos. “Statistical and Machine Learning Forecasting Methods: Concerns and Ways Forward.” PLOS ONE 13, no. 3 (March 27, 2018): e0194889. https://doi.org/10.1371/journal.pone.0194889.

2. Ahmed, Nesreen K., Amir F. Atiya, Neamat El Gayar, and Hisham El-Shishiny. “An Empirical Comparison of Machine Learning Models for Time Series Forecasting.” Econometric Reviews 29, no. 5–6 (August 30, 2010): 594–621. https://doi.org/10.1080/07474938.2010.481556.
