In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from skforecast.ForecasterRnn import ForecasterRnn
from skforecast.ForecasterRnn.utils import create_and_compile_model
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from keras.optimizers import Adam
from keras.losses import MeanSquaredError
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_raw = pd.read_csv("datos_preprocesados.csv", parse_dates=['fecha'], index_col='fecha')
df = df_raw.asfreq('D').copy()
df = df.sort_index()

In [None]:
end_train = '2023-01-01'
end_validation = '2023-08-05'
df_train = df.loc[: end_train, :]
df_val   = df.loc[end_train:end_validation, :]
df_test  = df.loc[end_validation:, :]

print(f"Dates train      : {df_train.index.min()} --- {df_train.index.max()}  (n={len(df_train)})")
print(f"Dates validacion : {df_val.index.min()} --- {df_val.index.max()}  (n={len(df_val)})")
print(f"Dates test       : {df_test.index.min()} --- {df_test.index.max()}  (n={len(df_test)})")

In [4]:

series = ['demanda', 'tmed', 'hrmed', 'festivo', 'diasem', 'trim']
levels = ['demanda']
lags = 7
steps = 1

data = df[series].copy()
data_train = df_train[series].copy()
data_val = df_val[series].copy()
data_test=df_test[series].copy()

def RNN(recurrent_units, dense_units, learning_rate, epochs, batch_size):
    # Crear el modelo RNN directamente dentro del forecaster
    model = create_and_compile_model(
        series=data_train,
        levels=levels, 
        lags=lags,
        steps=steps,
        recurrent_layer="LSTM",
        recurrent_units=recurrent_units,
        dense_units=dense_units,
        optimizer=Adam(learning_rate=learning_rate), 
        loss=MeanSquaredError()
    )

    # Crear el forecaster
    forecaster = ForecasterRnn(
        regressor=model,
        levels=levels,
        steps=steps,
        lags=lags,
        transformer_series=MinMaxScaler(),
        fit_kwargs={
            "epochs": epochs,  # Ajusta según sea necesario
            "batch_size": batch_size,
            "series_val": data_val,
        },
    )

    # Fit forecaster
    # ==============================================================================
#    forecaster.fit(data_train)

    # Trainig and overfitting tracking
    # ==============================================================================
#    fig, ax = plt.subplots(figsize=(7, 3))
#    forecaster.plot_history(ax=ax)

    # Backtesting with test data
    # ==============================================================================
    metrics, predictions = backtesting_forecaster_multiseries(
        forecaster=forecaster,
        steps=forecaster.max_step,
        series=data,
        levels=forecaster.levels,
        initial_train_size=len(data.loc[:end_validation, :]), # Datos de entrenamiento + validación
        metric="mean_absolute_error",
        verbose=False,
        refit=False,
    )
    return metrics, predictions

In [None]:
# Definir el espacio de búsqueda de hiperparámetros
param_grid = {
    'recurrent_units': [120, 140],
    'dense_units': [80, 90],
    'learning_rate': [ 0.0115, 0.011],
    'epochs': [8],
    'batch_size': [8, 6],
}

# Almacenar los resultados
results = []

# Realizar el grid search manual
for recurrent_units in param_grid['recurrent_units']:
    for dense_units in param_grid['dense_units']:
        for learning_rate in param_grid['learning_rate']:
            for epochs in param_grid['epochs']:
                for batch_size in param_grid['batch_size']:
                    # Imprimir la combinación de hiperparámetros
                    print(f'Testing combination: recurrent_units={recurrent_units}, dense_units={dense_units}, '
                          f'learning_rate={learning_rate}, epochs={epochs}, batch_size={batch_size}')
                    
                    results_model = RNN(recurrent_units, dense_units, learning_rate, epochs, batch_size)

                    # Guardar los resultados y las predicciones
                    results.append({
                        'recurrent_units': recurrent_units,
                        'dense_units': dense_units,
                        'learning_rate': learning_rate,
                        'epochs': epochs,
                        'batch_size': batch_size,
                        'MAE': results_model[0]['mean_absolute_error'][0],  # Supongamos que 'metrics' contiene el MAE
                        'predictions': results_model[1]  # Guarda las predicciones
                    })

# Convertir los resultados a un DataFrame
results_df = pd.DataFrame(results)

# Obtener el mejor resultado
best_result_index = results_df['MAE'].idxmin()  # Encuentra el índice del mejor MAE
mejor_result = results_df.loc[best_result_index]  # Obtiene el mejor resultado

# Imprimir el mejor resultado
print("\nMejores hiperparámetros encontrados:")
print(mejor_result)

# Guardar las predicciones del mejor resultado
best_predictions = mejor_result['predictions']


In [9]:
mejor_result

recurrent_units                                                  120
dense_units                                                       90
learning_rate                                                 0.0115
epochs                                                             8
batch_size                                                         6
MAE                                                       713.514135
predictions                         demanda
2023-08-06  23545.705...
Name: 5, dtype: object

In [None]:
predictions = best_predictions

plt.figure(figsize=(10, 5))  # Define el tamaño de la figura
plt.plot(data_test.index, data_test['demanda'], label="Real Value", color='blue', linestyle='-') # Valores reales
plt.plot(predictions.index, predictions, label="Predictions", color='orange', linestyle='--') # Valores predichos


# Configurar el título y las etiquetas
plt.title("Real Value vs Predicted in Test Data")
plt.xlabel("Date Time")
plt.ylabel("Users")

# Añadir leyenda
plt.legend(loc='upper left')

# Mostrar el gráfico
plt.grid()  # Añadir cuadrícula para mejor visualización
plt.tight_layout()  # Ajustar el layout
plt.show()
