In [51]:
import pandas as pd
import numpy as np

MODELOS DE REGRESIÓN:

In [42]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor

REGRESIÓN LINEAL

In [None]:
# Entrenamiento de modelo regresivo lineal devolviendo el error cuadrático medio
def error_entrenar_linearReg(df,columns_predict):
    modelo = LinearRegression()
    l = int(df.shape[0]*0.8)
    modelo.fit(X=df[:l].drop(columns=columns_predict),y=df[columns_predict][:l])
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = modelo.predict(df[l:].drop(columns=columns_predict))
    mse = mean_squared_error(df_pred['Predicciones'].values,df_pred[columns_predict].values)
    return mse 

In [None]:
# Entrenamiento de modelo regresivo lineal devolviendo la predicción
def pred_entrenar_linearReg(df,columns_predict):
    modelo = LinearRegression()
    l = int(df.shape[0]*0.8)
    modelo.fit(X=df[:l].drop(columns=columns_predict),y=df[columns_predict][:l])
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = modelo.predict(df[l:].drop(columns=columns_predict))
    return df_pred

DECISION TREE:

In [None]:
# Entrenamiento de modelo regresivo basado en árbol de decisión devolviendo el error cuadrático medio.
def error_entrenar_TreeReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = DecisionTreeRegressor(random_state=42)
    param_grid = {
        'max_depth': [None, 5, 10, 20, 30],
        'min_samples_split': [2, 5, 10, 15],
        'min_samples_leaf': [1, 2, 5, 10],
        'max_features': [None, 'sqrt', 'log2']
    }  
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = DecisionTreeRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    mse = mean_squared_error(df_pred['Predicciones'].values,df_pred[columns_predict].values)
    return mse

In [None]:
# Entrenamiento de modelo regresivo basado en árbol de decisión devolviendo la predicción.
def pred_entrenar_TreeReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = DecisionTreeRegressor(random_state=42)
    param_grid = {
        'max_depth': [None, 5, 10, 20, 30],
        'min_samples_split': [2, 5, 10, 15],
        'min_samples_leaf': [1, 2, 5, 10],
        'max_features': [None, 'sqrt', 'log2']
    }  
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = DecisionTreeRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    return df_pred

RANDOM FOREST

In [None]:
# Entrenamiento de modelo regresivo basado en random forest devolviendo el error cuadrático medio
def error_entrenar_RandomForestReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = RandomForestRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = RandomForestRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    mse = mean_squared_error(df_pred['Predicciones'].values,df_pred[columns_predict].values)
    
    return mse

In [None]:
# Entrenamiento de modelo regresivo basado en random forest devolviendo la predicción.
def pred_entrenar_RandomForestReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = RandomForestRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = RandomForestRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    return df_pred


GRADIENT BOOSTING:

In [None]:
# Entrenamiento de modelo regresivo basado en Gradient Boosting devolviendo la predicción
def pred_entrenar_GradientBoostReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = GradientBoostingRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.1, 1.0],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2']
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = GradientBoostingRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    return df_pred

In [None]:
# Entrenamiento de modelo regresivo basado en Gradient Boosting devolviendo el error cuadrático medio.
def error_entrenar_GradientBoostReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = GradientBoostingRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.1, 1.0],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2']
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = GradientBoostingRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    mse = mean_squared_error(df_pred['Predicciones'].values,df_pred[columns_predict].values)
    
    return mse

EXTRATREE:

In [None]:
# Entrenamiento de modelo regresivo basado en Extra Tree devolviendo el error cuadrático medio.
def error_entrenar_ExtraTreeReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = ExtraTreesRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = ExtraTreesRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    
    mse = mean_squared_error(df_pred['Predicciones'].values,df_pred[columns_predict].values)
    
    return mse

In [None]:
# Entrenamiento de modelo regresivo basado en Extra Tree devolviendo la predicción
def pred_entrenar_ExtraTreeReg(df,columns_predict):
    
    l = int(df.shape[0]*0.8)
    X_train=df[:l].drop(columns=columns_predict)
    y_train=df[columns_predict][:l]
    
    modelo = ExtraTreesRegressor(random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
    } 
    grid_search = GridSearchCV(estimator=modelo, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    optimized_model = ExtraTreesRegressor(**best_params, random_state=42) 
    optimized_model.fit(X_train, y_train)
    
    df_pred = df[l:].copy()
    df_pred['Predicciones'] = optimized_model.predict(df[l:].drop(columns=columns_predict))
    return df_pred

In [8]:
from skforecast.Sarimax import Sarimax
from skforecast.ForecasterSarimax import ForecasterSarimax
from skforecast.model_selection_sarimax import backtesting_sarimax
from skforecast.model_selection_sarimax import grid_search_sarimax
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from skforecast.model_selection import grid_search_forecaster
from sklearn.metrics import mean_squared_error
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from sklearn.linear_model import Ridge
from prophet import Prophet
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

SARIMAX:

In [None]:
# Definición de modelo SARIMAX con búsqueda de parámetros realizada por grid search devolviendo el error cuadrático medio
def error_sarimax(datos_train,datos_test, columna):
    
    # Grid search
    forecaster = ForecasterSarimax(
                    regressor=Sarimax(
                                    order=(1, 1, 1), # Placeholder replaced in the grid search
                                    maxiter=500
                                )
                )

    param_grid = {
        'order': [(0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (2, 1, 1), (1 ,1 ,2), ( 2, 1, 2),(0, 0, 0), (0, 0, 1), (1, 0, 0), (1, 0, 1), (2, 0, 1), (1 ,0 ,2), (2, 0, 2) ],
        'seasonal_order': [(0, 0, 0, 0), (0, 1, 0, 12), (1, 1, 1, 12)],
        'trend': [None]
    }

    resultados_grid = grid_search_sarimax(
                            forecaster            = forecaster,
                            y                     = datos_train[columna],
                            param_grid            = param_grid,
                            steps                 = 12,
                            refit                 = True,
                            metric                = 'mean_absolute_error',
                            initial_train_size    = int(len(datos_train)*0.8),
                            fixed_train_size      = False,
                            return_best           = False,
                            n_jobs                = 'auto',
                            suppress_warnings_fit = True,
                            verbose               = False,
                            show_progress         = True
                    )
    
    r=resultados_grid.index[0]

    # Predicciones de backtesting con el mejor modelo según el grid search
    # ==============================================================================
    forecaster_1 = ForecasterSarimax( regressor=Sarimax(order=resultados_grid.order[r], seasonal_order=resultados_grid.seasonal_order[r], maxiter=500),
                    )

    metrica_m1, predicciones_m1 = backtesting_sarimax(
                                            forecaster            = forecaster_1,
                                            y                     = datos_train[columna],
                                            initial_train_size    = int(len(datos_train)*0.8),
                                            steps                 = len(datos_test),
                                            metric                = 'mean_absolute_error',
                                            refit                 = True,
                                            n_jobs                = "auto",
                                            suppress_warnings_fit = True,
                                            verbose               = False,
                                            show_progress         = True
                                        )

    
    return metrics.mean_squared_error(datos_test, predicciones_m1[:len(datos_test)])
    

In [None]:
# Definición de modelo SARIMAX con búsqueda de parámetros realizada por grid search devolviendo la predicción

def prediccion_sarimax(datos,datos_train, columna,size):
    
    # Grid search
    forecaster = ForecasterSarimax(
                    regressor=Sarimax(
                                    order=(1, 1, 1), # Placeholder replaced in the grid search
                                    maxiter=500
                                )
                )

    param_grid = {
        'order': [(0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (2, 1, 1), (1 ,1 ,2), ( 2, 1, 2),(0, 0, 0), (0, 0, 1), (1, 0, 0), (1, 0, 1), (2, 0, 1), (1 ,0 ,2), ( 2, 0, 2) ],
        'seasonal_order': [(0, 0, 0, 0), (0, 1, 0, 12), (1, 1, 1, 12)],
        'trend': [None]
    }

    resultados_grid = grid_search_sarimax(
                            forecaster            = forecaster,
                            y                     = datos[columna],
                            param_grid            = param_grid,
                            steps                 = 12,
                            refit                 = True,
                            metric                = 'mean_absolute_error',
                            initial_train_size    = int(len(datos_train)*0.8),
                            fixed_train_size      = False,
                            return_best           = False,
                            n_jobs                = 'auto',
                            suppress_warnings_fit = True,
                            verbose               = False,
                            show_progress         = True
                    )
    
    r=resultados_grid.index[0]

    # Predicciones de backtesting con el mejor modelo según el grid search
    # ==============================================================================
    forecaster_1 = ForecasterSarimax( regressor=Sarimax(order=resultados_grid.order[r], seasonal_order=resultados_grid.seasonal_order[r], maxiter=500),
                    )

    metrica_m1, predicciones_m1 = backtesting_sarimax(
                                            forecaster            = forecaster_1,
                                            y                     = datos[columna],
                                            initial_train_size    = int(len(datos_train)*0.8),
                                            steps                 = size,
                                            metric                = 'mean_absolute_error',
                                            refit                 = True,
                                            n_jobs                = "auto",
                                            suppress_warnings_fit = True,
                                            verbose               = False,
                                            show_progress         = True
                                        )

    
    return predicciones_m1

FORECASTER AUTOREGRESIVO

In [4]:
def df_csv(folder,file,indice):
    return pd.read_csv(folder+'/'+file+'.csv',index_col=indice)

In [3]:
import pandas as pd

In [29]:
df1 = df_csv('Distribuciones','Normal-fin','indice')
df1.index = pd.to_datetime(df1.index)
df1.index.freq='M'

df1_train=df1[:int(df1.shape[0]*0.8)]
df1_test=df1[int(df1.shape[0]*0.8):]

In [26]:
df1_train.index.freq='M'

In [25]:
df1_test.index.freq='M'

In [45]:
# Forecaster con DT
forecasterDT = ForecasterAutoreg(
                    regressor = DecisionTreeRegressor(random_state=123),
                    lags      = 10
                )
# Valores candidatos de lags
lags_gridDT = [10, 20]

param_gridDT = {
        'max_depth': [None, 5, 10, 20, 30],
        'min_samples_split': [2, 5, 10, 15],
        'min_samples_leaf': [1, 2, 5, 10],
        'max_features': [None, 'sqrt', 'log2']
    }  

In [37]:
# Forecaster con RF
forecasterRF = ForecasterAutoreg(
                    regressor = RandomForestRegressor(random_state=123),
                    lags      = 10
                )
# Valores candidatos de lags
lags_gridRF = [10, 20]

param_gridRF = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
    }

In [None]:
# Forecaster con Gradient Boosting
forecasterGB = ForecasterAutoreg(
                    regressor = GradientBoostingRegressor(random_state=123),
                    lags      = 10
                )
# Valores candidatos de lags
lags_gridGB = [10, 20]

# Valores candidatos de hiperparámetros del regresor
param_gridGB = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.1, 1.0],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2']
} 

In [68]:
# Forecaster con ExtraTree
forecasterET = ForecasterAutoregDirect(
                    regressor = ExtraTreesRegressor(random_state=123),
                    steps     = df1_test.shape[0],
                    lags      = 10
)
# Valores candidatos de lags
lags_gridET = [10, 20]

param_gridET = {
        'n_estimators': [100, 200, 500],
        'max_depth': [ 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2'],
} 

In [46]:
# Entrenamiento del modelo forecaster devolviendo el error
def error_backtesting_forecasterAutoreg(datos_train,datos_test,column,size,steps,param_grid,lags_grid,forecaster):

    resultados_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = datos_train[column],
                        param_grid         = param_grid,
                        lags_grid          = lags_grid,
                        steps              = steps,
                        refit              = False,
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(datos_train)*0.8),
                        fixed_train_size   = False,
                        return_best        = True,
                        n_jobs             = 'auto',
                        verbose            = False
                    )

    # Predicciones
    # ==============================================================================
    predicciones = forecaster.predict(steps=size)
    
    # Error de test
    # ==============================================================================
    error_mse = mean_squared_error(
                    y_true = datos_test,
                    y_pred = predicciones
                )

    return error_mse


In [None]:
# Entrenamiento del modelo forecaster autorregresivo

def prediccion_backtesting_forecasterAutoreg(datos_train,column,size,steps,param_grid,lags_grid,forecaster):

    resultados_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = datos_train[column],
                        param_grid         = param_grid,
                        lags_grid          = lags_grid,
                        steps              = steps,
                        refit              = False,
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(datos_train)*0.8),
                        fixed_train_size   = False,
                        return_best        = True,
                        n_jobs             = 'auto',
                        verbose            = False
                    )

    # Predicciones
    # ==============================================================================
    predicciones = forecaster.predict(steps=size)

    return predicciones


In [21]:
df1_train['Valor']

indice
2014-01-31    79.007175
2014-02-28    84.297483
2014-03-31    83.758763
2014-04-30    80.400293
2014-05-31    74.976624
                ...    
2021-08-31    77.816583
2021-09-30    77.526485
2021-10-31    81.824310
2021-11-30    74.252928
2021-12-31    78.997264
Name: Valor, Length: 96, dtype: float64

In [47]:
prediccion_backtesting_forecasterAutoreg(df1_train,'Valor',df1_test.shape[0],36,param_gridET,lags_gridET,forecasterET)

Number of models compared: 480.


lags grid:   0%|          | 0/2 [00:00<?, ?it/s]

params grid:   0%|          | 0/240 [00:00<?, ?it/s]

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10] 
  Parameters: {'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 15}
  Backtesting metric: 8.92894295902656





2022-01-31    75.504968
2022-02-28    78.561574
2022-03-31    79.837846
2022-04-30    83.703765
2022-05-31    75.799248
2022-06-30    81.256860
2022-07-31    75.504968
2022-08-31    81.256860
2022-09-30    75.504968
2022-10-31    83.703765
2022-11-30    76.375194
2022-12-31    81.256860
2023-01-31    76.375194
2023-02-28    84.005923
2023-03-31    78.561574
2023-04-30    81.256860
2023-05-31    78.561574
2023-06-30    81.256860
2023-07-31    78.561574
2023-08-31    84.005923
2023-09-30    78.561574
2023-10-31    83.703765
2023-11-30    78.561574
2023-12-31    83.703765
Freq: M, Name: pred, dtype: float64

In [64]:
error_backtesting_forecasterAutoreg(df1_train,df1_test,'Valor',df1_test.shape[0],36,param_gridET,lags_gridET,forecasterET)

Number of models compared: 486.


lags grid:   0%|          | 0/2 [00:00<?, ?it/s]

params grid:   0%|          | 0/243 [00:00<?, ?it/s]

KeyboardInterrupt: 

MODELO FORECASTER AUTORREGRESIVO DIRECTO CON REGRESOR LINEAL CON PENALIZACIÓN RIDGE

In [None]:
# Entrenamiento del modelo forecaster autorregresivo directo con regresor lineal con penalización Ridge devolviendo las predicciones

def predicciones_backtesting_forecasterAutoregDirect(datos_train,column,steps,param_grid,lags_grid,forecaster):


    resultados_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = datos_train[column],
                        param_grid         = param_grid,
                        lags_grid          = lags_grid,
                        steps              = steps,
                        refit              = False,
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(datos_train)*0.8),
                        fixed_train_size   = False,
                        return_best        = True,
                        n_jobs             = 'auto',
                        verbose            = False
                    )

    # Predicciones
    # ==============================================================================
    predicciones = forecaster.predict()

    # Error de test
    # ==============================================================================
    return predicciones


In [61]:
#Forecaster con Ridge
forecasterRidge = ForecasterAutoregDirect(
                regressor     = Ridge(random_state=123),
                transformer_y = StandardScaler(),
                steps         = df1_test.shape[0],
                lags          = 5
             )

# Valores candidatos de lags
lags_gridRidge = [5, 12, 20]

# Valores candidatos de hiperparámetros del regresor
param_gridRidge = {'alpha': np.logspace(-5, 5, 10)}

In [56]:
# Entrenamiento del modelo forecaster autorregresivo directo devolviendo el error cuadrático medio

def error_backtesting_forecasterAutoregDirect(datos_train,datos_test,steps,lags_grid,param_grid,forecaster):

    resultados_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = datos_train[datos_train.columns[0]],
                        param_grid         = param_grid,
                        lags_grid          = lags_grid,
                        steps              = steps,
                        refit              = False,
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(datos_train)*0.8),
                        fixed_train_size   = False,
                        return_best        = True,
                        n_jobs             = 'auto',
                        verbose            = False
                    )

    # Predicciones
    # ==============================================================================
    predicciones = forecaster.predict()

    # Error de test
    # ==============================================================================
    error_mse = mean_squared_error(
                    y_true = datos_test,
                    y_pred = predicciones
                )

    return error_mse

In [69]:
error_backtesting_forecasterAutoregDirect(df1_train,df1_test,df1_test.shape[0],lags_gridET,param_gridET,forecasterET)

Number of models compared: 486.


lags grid:   0%|          | 0/2 [00:00<?, ?it/s]

params grid:   0%|          | 0/243 [00:00<?, ?it/s]

KeyboardInterrupt: 

PROPHET

In [None]:
# Definimos el modelo de predicción prophet cuyos parámetros son unos datos de entrenamiento y otros de test y devolvemos el error cuadratico medio
def error_prophet_prediccion(data_train,data_test,frequ):
    
    data_train=data_train.reset_index()
    data_train.rename(columns={data_train.columns[0] : 'ds', data_train.columns[1]: 'y'}, inplace=True)
    model = Prophet()
    model.fit(data_train)
    
    future = model.make_future_dataframe(periods=len(data_test),freq=frequ)
    forecast=model.predict(future)
    
    y_true=data_test.values
    y_pred=forecast['yhat'][len(data_train):].values
    
    mae = mean_squared_error(y_true,y_pred)
    return mae

In [None]:
# Definimos el modelo de predicción prophet cuyos parámetros son unos datos de entrenamiento y otros de test y devolvemos las predicciones
def pred_prophet_prediccion(data_train,column,size,frequ):
    
    data_train=data_train.reset_index()
    data_train.rename(columns={data_train.columns[0] : 'ds', column: 'y'}, inplace=True)
    model = Prophet()
    model.fit(data_train)
    
    future = model.make_future_dataframe(periods=size,freq=frequ)
    forecast=model.predict(future)
    
    y_pred=forecast['yhat'][len(data_train):].values
    
    return y_pred