# PRACTICA MODELOS ARIMA Y MACHINE LEARNING
# SETUP AMBIENTE Y DATASET

### Instalaciones

In [1]:
# %pip install pycaret

## Imports base

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

## Carga del Dataset

In [3]:
from pycaret.datasets import get_data
airline = get_data('airline')


Unnamed: 0_level_0,Number of airline passengers
Period,Unnamed: 1_level_1
1949-01,112.0
1949-02,118.0
1949-03,132.0
1949-04,129.0
1949-05,121.0


In [4]:
airline.index

PeriodIndex(['1949-01', '1949-02', '1949-03', '1949-04', '1949-05', '1949-06',
             '1949-07', '1949-08', '1949-09', '1949-10',
             ...
             '1960-03', '1960-04', '1960-05', '1960-06', '1960-07', '1960-08',
             '1960-09', '1960-10', '1960-11', '1960-12'],
            dtype='period[M]', name='Period', length=144)

Vemos que el dataset ya tiene cargado el tipo de Index con periodicidad 'M'. **MUY IMPORTANTE PARA PYCARET TENER DEFINIDA LA PERIODICIDAD**

In [5]:
from pycaret.time_series import *

Armamos el experimento

In [6]:
exp = TSForecastingExperiment()
exp.setup(data = airline, target='Number of airline passengers' ,  fh = 12, coverage=0.95)

Unnamed: 0,Description,Value
0,session_id,2657
1,Target,Number of airline passengers
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(144, 1)"
5,Transformed data shape,"(144, 1)"
6,Transformed train set shape,"(132, 1)"
7,Transformed test set shape,"(12, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


<pycaret.time_series.forecasting.oop.TSForecastingExperiment at 0x7a9a30107010>

usamos la función check_stats() para ver características del dataset

In [7]:
exp.check_stats()

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Transformed,Length,,144.0
1,Summary,Statistics,Transformed,# Missing Values,,0.0
2,Summary,Statistics,Transformed,Mean,,280.298611
3,Summary,Statistics,Transformed,Median,,265.5
4,Summary,Statistics,Transformed,Standard Deviation,,119.966317
5,Summary,Statistics,Transformed,Variance,,14391.917201
6,Summary,Statistics,Transformed,Kurtosis,,-0.364942
7,Summary,Statistics,Transformed,Skewness,,0.58316
8,Summary,Statistics,Transformed,# Distinct Values,,118.0
9,White Noise,Ljung-Box,Transformed,Test Statictic,"{'alpha': 0.05, 'K': 24}",1606.083817


Prueba de Ljung-Box: Es una prueba estadística que se utiliza comúnmente para verificar si existen autocorrelaciones en una serie temporal. Específicamente, prueba la hipótesis nula de que las autocorrelaciones de la serie temporal para los rezagos del 1 al K son todas iguales a cero.

Hipótesis nula de la prueba ADF (Augmented Dickey-Fuller): La serie temporal tiene una raíz unitaria, lo que significa que no es estacionaria. Un valor p pequeño favorece la estacionariedad.

Hipótesis nula de la prueba KPSS: La serie temporal es estacionaria alrededor de una tendencia determinista (o simplemente estacionaria si no se incluye una tendencia en la ecuación de la prueba). Un valor p grande favorece la estacionariedad.

Hipótesis nula de la prueba de Shapiro-Wilk: La muestra proviene de una población con distribución normal. Un valor p grande favorece la normalidad.

In [8]:
exp.plot_model(plot='train_test_split')

# Modelos ARIMA y SARIMA

Selección de p, q, P, Q y el m

hacemos un plot de el dataset original, diferenciado por lag_1 y diferenciado por lag_12

In [9]:
exp.plot_model(plot="diff", data_kwargs={"order_list": [1,12], "acf": True, "pacf": True})

Ahora vamos a diferenciar por el lag_1 y el lag_12 al mismo tiempo

In [10]:
exp.plot_model(plot="diff", data_kwargs={"lags_list": [[1,12]], "acf": True, "pacf": True})

Pycaret ya tiene varios modelos cargados para usarse (todos los que necesitamos para esta clase)

In [11]:
exp.models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
naive,Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
grand_means,Grand Means Forecaster,sktime.forecasting.naive.NaiveForecaster,True
snaive,Seasonal Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
polytrend,Polynomial Trend Forecaster,sktime.forecasting.trend._polynomial_trend_for...,True
arima,ARIMA,sktime.forecasting.arima.ARIMA,True
auto_arima,Auto ARIMA,sktime.forecasting.arima.AutoARIMA,True
exp_smooth,Exponential Smoothing,sktime.forecasting.exp_smoothing.ExponentialSm...,True
ets,ETS,sktime.forecasting.ets.AutoETS,True
theta,Theta Forecaster,sktime.forecasting.theta.ThetaForecaster,True
stlf,STLF,sktime.forecasting.trend._stl_forecaster.STLFo...,True


In [12]:
ar1 = exp.create_model('arima', order = (1,0,0), seasonal_order=(0,0,0,0), with_intercept=True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,3.376,3.746,102.7986,129.4174,0.1972,0.2297,-2.0236


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
ma1 = exp.create_model('arima', order = (0,0,1), seasonal_order=(0,0,0,0), with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,6.6833,6.4226,203.5061,221.8905,0.4119,0.5336,-7.8881


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [14]:
random_walk= exp.create_model('arima', order = (0,0,0), seasonal_order=(0,0,0,12) , with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,7.0172,6.5493,213.6743,226.2657,0.4362,0.5649,-8.2421


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
arima111= exp.create_model('arima', order = (1,1,1), seasonal_order=(0,0,0,12) , with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [16]:
personal_arima_1 = exp.create_model('arima', order = (1,1,1), seasonal_order=(0,0,0,12) , with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [17]:
personal_arima_2 = exp.create_model('arima', order = (1,1,1), seasonal_order=(0,0,0,12) , with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [18]:
personal_arima_3 = exp.create_model('arima', order = (1,1,1), seasonal_order=(0,0,0,12) , with_intercept= True, cross_validation=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [19]:
mis_modelos = [ar1, ma1, random_walk, arima111, personal_arima_1, personal_arima_2, personal_arima_3]
lables_mis_modelos = ['ar1', 'ma1', 'random_walk', 'arima(1,1,1)', 'personal_arima_1', 'personal_arima_2', 'personal_arima_3']

In [20]:
exp.compare_models(mis_modelos, cross_validation=False)

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
3,ARIMA,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225,0.47
4,ARIMA,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225,0.37
5,ARIMA,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225,0.18
6,ARIMA,1.999,2.3844,60.8693,82.3772,0.1166,0.1256,-0.225,0.16
0,ARIMA,3.376,3.746,102.7986,129.4174,0.1972,0.2297,-2.0236,0.07
1,ARIMA,6.6833,6.4226,203.5061,221.8905,0.4119,0.5336,-7.8881,0.07
2,ARIMA,7.0172,6.5493,213.6743,226.2657,0.4362,0.5649,-8.2421,0.03


Processing:   0%|          | 0/33 [00:00<?, ?it/s]

In [21]:
exp.plot_model(mis_modelos, plot='forecast', data_kwargs={'fh':36, 'labels':lables_mis_modelos})

In [22]:
exp.plot_model(personal_arima_2  , plot='forecast', data_kwargs={'fh':24, 'labels':['personal_arima_2']})

In [23]:
exp.plot_model(personal_arima_2, plot='diagnostics')

In [24]:
exp.check_stats(personal_arima_2, test = 'adf', alpha=0.05)

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Stationarity,ADF,Residual,Stationarity,{'alpha': 0.05},False
1,Stationarity,ADF,Residual,p-value,{'alpha': 0.05},0.151335
2,Stationarity,ADF,Residual,Test Statistic,{'alpha': 0.05},-2.366772
3,Stationarity,ADF,Residual,Critical Value 1%,{'alpha': 0.05},-3.487517
4,Stationarity,ADF,Residual,Critical Value 5%,{'alpha': 0.05},-2.886578
5,Stationarity,ADF,Residual,Critical Value 10%,{'alpha': 0.05},-2.580124


# BASTA DE ELEGIR A MANO! AUTO-ARIMA

In [25]:
auto_arima = exp.create_model('auto_arima', cross_validation=False, information_criterion='aic', start_p=0, start_q=0, max_p=2, max_q=2, seasonal=True, stepwise=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,0.3743,0.4569,11.3967,15.7842,0.0238,0.0238,0.955


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Si stepwise' = False, Auto-ARIMA hace un gridsearch de todos los valores posibles y quedarse con el mejor. Esto puede tomar much tiempo

In [31]:
auto_arima.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,132.0
Model:,"SARIMAX(2, 0, 0)x(1, 1, [1, 2], 12)",Log Likelihood,-443.979
Date:,"Sat, 12 Jul 2025",AIC,901.957
Time:,19:27:26,BIC,921.47
Sample:,01-31-1949,HQIC,909.881
,- 12-31-1959,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1546,0.708,0.218,0.827,-1.232,1.542
ar.L1,0.6353,0.091,6.978,0.000,0.457,0.814
ar.L2,0.2356,0.090,2.631,0.009,0.060,0.411
ar.S.L12,0.9599,0.184,5.214,0.000,0.599,1.321
ma.S.L12,-1.2049,0.336,-3.587,0.000,-1.863,-0.547
ma.S.L24,0.3322,0.118,2.814,0.005,0.101,0.564
sigma2,88.2740,19.755,4.468,0.000,49.554,126.994

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,1.09
Prob(Q):,0.96,Prob(JB):,0.58
Heteroskedasticity (H):,1.56,Skew:,-0.07
Prob(H) (two-sided):,0.16,Kurtosis:,3.45


In [27]:
auto_arima_2 = exp.create_model('auto_arima', cross_validation=False, information_criterion='aic', start_p=0, start_q=0, max_p=10, max_q=10, seasonal=True, stepwise=False)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,0.3743,0.4569,11.3967,15.7842,0.0238,0.0238,0.955


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [32]:
auto_arima_2.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,132.0
Model:,"SARIMAX(2, 0, 0)x(1, 1, [1, 2], 12)",Log Likelihood,-443.979
Date:,"Sat, 12 Jul 2025",AIC,901.957
Time:,19:28:32,BIC,921.47
Sample:,01-31-1949,HQIC,909.881
,- 12-31-1959,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.1546,0.708,0.218,0.827,-1.232,1.542
ar.L1,0.6353,0.091,6.978,0.000,0.457,0.814
ar.L2,0.2356,0.090,2.631,0.009,0.060,0.411
ar.S.L12,0.9599,0.184,5.214,0.000,0.599,1.321
ma.S.L12,-1.2049,0.336,-3.587,0.000,-1.863,-0.547
ma.S.L24,0.3322,0.118,2.814,0.005,0.101,0.564
sigma2,88.2740,19.755,4.468,0.000,49.554,126.994

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,1.09
Prob(Q):,0.96,Prob(JB):,0.58
Heteroskedasticity (H):,1.56,Skew:,-0.07
Prob(H) (two-sided):,0.16,Kurtosis:,3.45


Le agregamos el stepwise=True

In [29]:
auto_arima_fast = exp.create_model('auto_arima', cross_validation=False, information_criterion='aic', start_p=0, start_q=0, max_p=5, max_q=5, seasonal=True, stepwise=True)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,0.4893,0.5365,14.8982,18.5365,0.031,0.0309,0.938


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [33]:
auto_arima_fast.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,132.0
Model:,"SARIMAX(3, 0, 0)x(0, 1, 0, 12)",Log Likelihood,-447.843
Date:,"Sat, 12 Jul 2025",AIC,905.686
Time:,19:28:49,BIC,919.623
Sample:,01-31-1949,HQIC,911.346
,- 12-31-1959,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,5.5341,2.007,2.757,0.006,1.600,9.468
ar.L1,0.7049,0.095,7.393,0.000,0.518,0.892
ar.L2,0.2574,0.131,1.968,0.049,0.001,0.514
ar.L3,-0.1434,0.107,-1.338,0.181,-0.354,0.067
sigma2,101.0969,12.818,7.887,0.000,75.974,126.220

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,2.83
Prob(Q):,0.96,Prob(JB):,0.24
Heteroskedasticity (H):,1.41,Skew:,-0.14
Prob(H) (two-sided):,0.29,Kurtosis:,3.7


Vamos a customizarlo un poco. Ninguno de los modelos hechos con autoarima hizo diferenciación

In [37]:
auto_arima_augmented = exp.create_model('auto_arima', cross_validation=False, information_criterion='aic', start_p=0, start_q=0, d=1, D=1)

Unnamed: 0,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
Test,0.6085,0.6927,18.5277,23.9317,0.0418,0.0403,0.8966


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [39]:
exp.plot_model([auto_arima, auto_arima_fast, auto_arima_augmented], plot='forecast', data_kwargs={'fh':48, 'labels':['auto_arima', 'auto_arima_fast', 'auto_arima_augmented']})