In [705]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_pacf
from prophet import Prophet
import timesfm
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import OrdinalEncoder, FunctionTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from numpy import fft

from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# --------------------------
# Required Packages for Models
# --------------------------
import xgboost as xgb
from lightgbm import LGBMRegressor

# PyTorch and related libraries
import torch
import torch.nn as nn
import torch.optim as optim

import warnings
warnings.filterwarnings('ignore')

import logging
cmdstanpy_logger = logging.getLogger("cmdstanpy")
absl_logger = logging.getLogger("absl")
cmdstanpy_logger.disabled = True
absl_logger.disabled = True

In [706]:
tfm = timesfm.TimesFm(
      hparams=timesfm.TimesFmHparams(
          backend="gpu",
          per_core_batch_size=32,
          horizon_len=128,
          num_layers=50,
          use_positional_embedding=False,
          context_len=2048,
      ),
      checkpoint=timesfm.TimesFmCheckpoint(
          huggingface_repo_id="google/timesfm-2.0-500m-pytorch"),
  )

Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 56679.78it/s]


### Load data

In [707]:
df_omie_b = pd.read_csv('../../data/df_omie_blind(in).csv')
df_omie_l = pd.read_csv('../../data/df_omie_labelled(in).csv')
filtered_cat = pd.read_csv('../../data/filtered_categories(in).csv')
unit_list = pd.read_csv('../../data/unit_list(in).csv')

In [708]:
data = df_omie_l.merge(unit_list, on='Codigo', how='left')
data = data.merge(filtered_cat, on='Codigo', how='left')
codes = filtered_cat['Codigo'].unique()
data = data[data['Codigo'].isin(codes)]

In [709]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 703248 entries, 0 to 712263
Data columns (total 12 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Codigo                703248 non-null  object 
 1   Descripcion           703248 non-null  object 
 2   fechaHora             703248 non-null  object 
 3   PrecEuro              703248 non-null  float64
 4   Energia               703248 non-null  float64
 5   Descripción           403466 non-null  object 
 6   Agente                403466 non-null  object 
 7   Porcentaje_Propiedad  403466 non-null  float64
 8   Tipo_Unidad           403466 non-null  object 
 9   Zona/Frontera         403466 non-null  object 
 10  Tecnología            403466 non-null  object 
 11  Categoria             703248 non-null  object 
dtypes: float64(3), object(9)
memory usage: 69.7+ MB


## Feature Engineering

In [710]:
def time_features(df: pd.DataFrame):
    df['fechaHora'] = pd.to_datetime(df['fechaHora'])
    df['date'] = df['fechaHora'].dt.date
    df['hour'] = df['fechaHora'].dt.hour
    df['day_of_week'] = df['fechaHora'].dt.dayofweek  # Monday=0, Sunday=6
    df['month'] = df['fechaHora'].dt.month
    df['day_of_month'] = df['fechaHora'].dt.day
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
    df.sort_values(['fechaHora', 'Codigo'], inplace=True)
    df['t'] = (df['fechaHora'] - df['fechaHora'].min()).dt.total_seconds() / 3600
    
    def sin_cos_features(df: pd.DataFrame, period, K, time_col='t'):
        df = df.sort_values(['Codigo', 'fechaHora'])
        for k in range(1, K + 1):
            df[f'sin_{period}_{k}'] = np.sin(2 * np.pi * k * df[time_col] / period)
            df[f'cos_{period}_{k}'] = np.cos(2 * np.pi * k * df[time_col] / period)
        return df
    
    df = sin_cos_features(df, period=24, K=3)
    
    return df    

def cyclical_features(df: pd.DataFrame):
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['dow_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['dow_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    return df
        
def interaction_features(df: pd.DataFrame):
    df['energia_hour_sin'] = df['Energia'] * df['hour_sin']
    return df
    
def lags_features(df: pd.DataFrame):
    df.sort_values(['fechaHora'], inplace=True)
    df['lag_PrecEuro'] = df.groupby('Codigo')['PrecEuro'].shift(24*28)
    df['lag_Energia'] = df.groupby('Codigo')['Energia'].shift(24*28)
    df['lag_Energia'] = np.log(df['lag_Energia'] + 1)
    df['lag1_Energia'] = df.groupby('Codigo')['lag_Energia'].shift(1)
    df['lag24_Energia'] = df.groupby('Codigo')['lag_Energia'].shift(24)
    return df

In [711]:
def feature_engineering(data: pd.DataFrame):
    data['Energia_stationary'] = data['Energia'].diff()
    data = time_features(data)
    data = cyclical_features(data)
    data = interaction_features(data)
    data = lags_features(data)
    data = data.sort_values(['fechaHora', 'Codigo'])
    return data

In [712]:
data = feature_engineering(data)

### Missing Values

Due to lags, we have many missing values at the beginning of each codigo. As it represents a large proportion of the df, I will impute by the mean per hour. This might cause leakages for the first records but shouldn't impact the model.

In [713]:
nan_counts = data.isna().sum()
nan_counts = nan_counts[nan_counts > 1]
print(nan_counts)

Descripción             299782
Agente                  299782
Porcentaje_Propiedad    299782
Tipo_Unidad             299782
Zona/Frontera           299782
Tecnología              299782
lag_PrecEuro            209664
lag_Energia             209664
lag1_Energia            209976
lag24_Energia           217152
dtype: int64


Let's drop Technologia, Zona/Frontera, Tipo_Unidad, Porcentaje_Propiedad, Agente and Descripción as their feature importance is close to 0.

In [714]:
data.drop(columns=['Descripción', 'Agente', 'Porcentaje_Propiedad', 'Tipo_Unidad', 'Zona/Frontera', 'Tecnología'], inplace=True)

In [715]:
def impute_codigo_hour(df: pd.DataFrame):
    """
    Impute missing values in selected columns using the mean computed per Codigo and per hour.
    """
    df = df.copy()

    df['fechaHora'] = pd.to_datetime(df['fechaHora'])

    cols_to_impute = ['lag_PrecEuro', 'lag_Energia', 'lag1_Energia', 'lag24_Energia']

    for col in cols_to_impute:
        df[col] = df.groupby(['Codigo', 'hour'])[col].transform(lambda x: x.fillna(x.mean()))
        
    return df

In [716]:
data = impute_codigo_hour(data)
data.dropna(inplace=True)

## Feature Transformation

In [717]:
class OrdinalEncodingTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        # Initialize the encoder with any desired options.
        self.encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
        self.cat_cols = None

    def fit(self, X: pd.DataFrame, y=None):
        # Identify categorical columns (dtype object) in the training data.
        self.cat_cols = X.select_dtypes(include=['object']).columns
        # Fit the encoder on the categorical columns.
        self.encoder.fit(X[self.cat_cols])
        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        X_transformed = X.copy()
        if self.cat_cols is not None:
            X_transformed[self.cat_cols] = self.encoder.transform(X_transformed[self.cat_cols])
        return X_transformed

In [718]:
def agg_features(df: pd.DataFrame):
    df['cum_energy'] = df.groupby(['Codigo', 'date'])['lag_Energia'].cumsum()
    return df

def rolling_mean_features(df: pd.DataFrame):
    features = ['lag_Energia', 'lag_PrecEuro']
    groups = ['Codigo', 'Categoria']
    times = [12, 24, 48, 168]
    for group in groups:
        for feature in features:
            for time in times:
                df[f'roll{time}_mean_{feature}'] = df.groupby(group)[feature] \
                    .transform(lambda x: x.rolling(window=time, min_periods=1).mean())
    return df

def ewm_features(df: pd.DataFrame):
    features = ['lag_Energia', 'lag_PrecEuro']
    groups = ['Codigo', 'Categoria']
    spans = [12, 24, 48, 168]
    for group in groups:
        for feature in features:
            for span in spans:
                df[f'ewm{span}_mean_{feature}'] = df.groupby(group)[feature] \
                    .transform(lambda x: x.ewm(span=span, min_periods=1).mean())
    return df

def diff_features(df: pd.DataFrame):
    features = ['lag_Energia', 'lag_PrecEuro']
    for feature in features:
        df[f'diff_{feature}'] = df.groupby('Codigo')[feature].diff()
    return df

def volatility_features(df: pd.DataFrame):
    features = ['lag_Energia', 'lag_PrecEuro']
    groups = ['Codigo', 'Categoria']
    windows = [12, 24, 48, 168]
    for group in groups:
        for feature in features:
            for window in windows:
                df[f'volatility_{window}_{feature}'] = df.groupby(group)[feature] \
                    .transform(lambda x: x.rolling(window=window, min_periods=1).std())
    return df

def fourrier_features(df: pd.DataFrame):    
    def apply_fft(group):
            X = fft.fft(group['lag_Energia'])
            N = len(X)
            group['lag_Energia_fft'] = np.abs(X) / N  # Normalize by length
            return group

    df = df.groupby('Codigo', group_keys=False).apply(apply_fft)
    return df

def frequency_power_features(df: pd.DataFrame):
    df['power_spectrum'] = df.groupby('Codigo')['lag_Energia'].transform(lambda x: np.abs(fft.fft(x))**2 / len(x))
    return df

## Pipeline

In [753]:
pipeline = Pipeline([
    ('agg_features', FunctionTransformer(agg_features)),
    ('rolling_mean_features', FunctionTransformer(rolling_mean_features)),
    ('ewm_features', FunctionTransformer(ewm_features)),
    ('diff_features', FunctionTransformer(diff_features)),
    ('volatility_features', FunctionTransformer(volatility_features)),
    ('fourrier_features', FunctionTransformer(fourrier_features)),
    ('frequency_power_features', FunctionTransformer(frequency_power_features)),
    ('ordinal_encoding', OrdinalEncodingTransformer()),
    ('dropna', FunctionTransformer(lambda x: x.dropna()))
])

## Models

### TimesFM

In [720]:
class TimesFMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TimesFMModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [721]:
def prepare_timesfm(X_train: pd.DataFrame, y_train: pd.Series, X_test: pd.DataFrame) -> tuple:
    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
    return X_train_tensor, y_train_tensor, X_test_tensor

In [722]:
TIMESFM_MODEL_PARAMS = {
    'hidden_dim': 100,
    'output_dim': 1,
}

TIMESFM_TRAIN_PARAMS = {
    'lr': 0.01,
}


In [748]:
def train_timesfm(
    X_train_tensor: torch.Tensor, 
    y_train_tensor: torch.Tensor,
    TIMESFM_MODEL_PARAMS: dict,
    TIMESFM_TRAIN_PARAMS: dict,
    epochs: int = 150
    ) -> TimesFMModel:

    timesfm_model = TimesFMModel(input_dim=X_train_tensor.shape[1], **TIMESFM_MODEL_PARAMS)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(timesfm_model.parameters(), **TIMESFM_TRAIN_PARAMS)
    
    # Train the model.
    timesfm_model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = timesfm_model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()
        
    return timesfm_model
    
def evaluate_timesfm(timesfm_model: TimesFMModel, X_test_tensor: torch.Tensor) -> np.ndarray:
    timesfm_model.eval()
    with torch.no_grad():
        y_pred_timesfm = timesfm_model(X_test_tensor).numpy().flatten()
        
    return y_pred_timesfm

### LSTM

In [724]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # batch_first=True makes input shape (batch, seq, feature)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Initialize hidden and cell states with zeros (and send to same device as x)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        # Use the output from the final time step
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [725]:
def prepare_lstm(X_train: pd.DataFrame, y_train: pd.Series, X_test: pd.DataFrame) -> tuple:
    selected_features = [
        'lag1_Energia', 'lag24_Energia', 'roll24_mean_Energia',
        'hour', 'day_of_week', 'day_of_month', 'month', 'is_weekend',
        'hour_sin', 'hour_cos', 'dow_sin', 'dow_cos',
        'PrecEuro', 'cum_energy'
    ]
    X_train_selected = X_train[selected_features].values  # shape: (num_samples, num_features)
    X_test_selected  = X_test[selected_features].values

    # Add a time dimension (sequence length = 1)
    X_train_lstm = torch.tensor(X_train_selected, dtype=torch.float32).unsqueeze(1)  # shape: (samples, 1, num_features)
    X_test_lstm  = torch.tensor(X_test_selected, dtype=torch.float32).unsqueeze(1)
    y_train_lstm = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    
    return X_train_lstm, y_train_lstm, X_test_lstm

In [726]:
LSTM_MODEL_PARAMS = {
    'hidden_size': 50,
    'num_layers': 1,
    'output_size': 1
}

LSTM_TRAIN_PARAMS = {
    'lr': 0.01,
}


In [727]:
def train_lstm(
    X_train: torch.Tensor, 
    y_train: torch.Tensor, 
    LSTM_MODEL_PARAMS: dict,
    LSTM_TRAIN_PARAMS: dict,
    epochs: int=150
    ) -> LSTMModel:
    
    input_size = X_train.shape[2]
    lstm_model = LSTMModel(input_size, **LSTM_MODEL_PARAMS)

    criterion_lstm = nn.MSELoss()
    optimizer_lstm = optim.Adam(lstm_model.parameters(), **LSTM_TRAIN_PARAMS)
    
    lstm_model.train()
    for epoch in range(epochs):
        optimizer_lstm.zero_grad()
        outputs = lstm_model(X_train)
        loss = criterion_lstm(outputs, y_train)
        loss.backward()
        optimizer_lstm.step()
        
    return lstm_model
    
    
def evaluate_lstm(lstm_model: LSTMModel, X_test_lstm: torch.Tensor) -> np.ndarray:
    lstm_model.eval()
    with torch.no_grad():
        y_pred_lstm = lstm_model(X_test_lstm).numpy().flatten()
    return y_pred_lstm

### XGB

In [728]:
XGB_PARAMS = {
    'n_estimators': 1000,
    'early_stopping_rounds': 50,
    'objective': 'reg:squarederror',
    'max_depth': 3,
    'learning_rate': 0.01,
    'verbosity': 0,
    'booster': 'gbtree'
}

In [729]:
def train_xgb(
    X_train: pd.DataFrame, 
    y_train: pd.Series, 
    X_test: pd.DataFrame,
    y_test: pd.Series,
    XGB_PARAMS: dict) -> xgb.XGBRegressor:    
    xgb_reg = xgb.XGBRegressor(**XGB_PARAMS)
    xgb_reg.fit(
        X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=False
    )
    return xgb_reg
    
def evaluate_xgb(xgb_reg: xgb.XGBRegressor, X_test: pd.DataFrame) -> np.ndarray:
    return xgb_reg.predict(X_test)
    

### LGBM

In [730]:
LGBM_PARAMS = {
    'n_estimators': 1000,
    'learning_rate': 0.01,
    'max_depth': 3
}

In [731]:
def train_lgbm(
    X_train: pd.DataFrame, 
    y_train: pd.Series, 
    X_test: pd.DataFrame,
    y_test: pd.Series,
    LGBM_PARAMS: dict
    ) -> LGBMRegressor:    
    lgbm_reg = LGBMRegressor(**LGBM_PARAMS)
    lgbm_reg.fit(
        X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)]
    )
    return lgbm_reg
    
def evaluate_lgbm(lgbm_reg: LGBMRegressor, X_test: pd.DataFrame) -> np.ndarray:
    return lgbm_reg.predict(X_test)

### Prophet

In [741]:
def prepare_prophet(train: pd.DataFrame, TARGET: str) -> pd.DataFrame:
    train_prophet = train.reset_index()
    train_prophet['ds'] = pd.to_datetime(train_prophet['fechaHora'])

    train_prophet = train_prophet.rename(columns={TARGET: 'y'})

    return train_prophet[['ds', 'y', 'lag_PrecEuro', 'lag_Energia']]

def train_prophet(train_prophet: pd.DataFrame) -> Prophet:

    prophet_model = Prophet()
    prophet_model.add_regressor('lag_PrecEuro')
    prophet_model.add_regressor('lag_Energia')

    # Fit the model
    prophet_model.fit(train_prophet)
    
    return prophet_model

def evaluate_prophet(prophet_model: Prophet, test: pd.DataFrame) -> np.ndarray:

    test_prophet = test.reset_index()
    test_prophet['ds'] = pd.to_datetime(test_prophet['fechaHora'])
    future = test_prophet[['ds', 'lag_PrecEuro', 'lag_Energia']]

    # Forecast
    forecast = prophet_model.predict(future)
    return forecast['yhat'].values

## Benchmark

### Custom Rolling Window CV

In [733]:
def custom_rolling_window_cv(data: pd.DataFrame, initial_train_window: int, forecast_horizon: int, step: int):
    """
    Custom rolling window cross-validation.
    """
    n = len(data)
    train_end = initial_train_window  
    while (train_end + forecast_horizon) <= n:
        train_idx = list(range(0, train_end))
        test_idx = list(range(train_end, train_end + forecast_horizon))
        yield train_idx, test_idx
        train_end += step

In [749]:
nunique_codes = data['Codigo'].nunique()
INITIAL_TRAIN_WINDOW = 24*28*nunique_codes
FORECAST_HORIZON = 24*28*nunique_codes
STEP = 24*7*nunique_codes

EXCLUDED_COLS = ['fechaHora', 'Energia', 'Energia_stationary']
FEATURES = [col for col in data.columns if col not in EXCLUDED_COLS]
TARGET = 'Energia_stationary'

model_names = ['TimesFM', 'XGB', 'LGBM', 'Prophet', 'LSTM']
results = {model: {'mae': [], 'mape': []} for model in model_names}

In [735]:
def mae_score(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

def mape_score(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / np.abs(y_true) + 1e-3))

In [755]:
# Split the data such that the test set represents the last 3 months
split_date = pd.to_datetime('2024-05-01')
train = data[data['fechaHora'] < split_date]
test = data[data['fechaHora'] >= split_date]

EXCLUDED_COLS = ['fechaHora', 'Energia', 'Energia_stationary']
FEATURES = [col for col in data.columns if col not in EXCLUDED_COLS]
TARGET = 'Energia_stationary'

# Common splits for models that require X and y inputs:
X_train = train[FEATURES]
y_train = train[TARGET]
X_test  = test[FEATURES]
y_test  = test[TARGET]

X_train = pipeline.fit_transform(X_train)
X_test = pipeline.transform(X_test)

print(f"train shape: {train.shape}")
print(f"X_train shape: {X_train.shape}")
print(f"test shape: {test.shape}")
print(f"X_test shape: {X_test.shape}")

train shape: (463943, 29)
X_train shape: (463631, 55)
test shape: (239304, 29)
X_test shape: (238992, 55)


In [756]:
model_names = ['TimesFM']

for fold, (train_idx, test_idx) in enumerate(custom_rolling_window_cv(data, INITIAL_TRAIN_WINDOW, FORECAST_HORIZON, STEP)):
    print(f"\n===== Fold {fold} =====")
    
    # Split the data into train and test folds
    train = data.iloc[train_idx]
    test = data.iloc[test_idx]
    
    # Common splits for models that require X and y inputs:
    X_train = train[FEATURES]
    y_train = train[TARGET]
    X_test  = test[FEATURES]
    y_test  = test[TARGET]
    
    X_train = pipeline.fit_transform(X_train)
    X_test = pipeline.transform(X_test)
    
    # --------------------------
    if 'Prophet' in model_names:
        data_prophet = prepare_prophet(train, TARGET)
        prophet_model = train_prophet(data_prophet)
        y_pred_prophet = evaluate_prophet(prophet_model, test)
        
        mae_prophet = mae_score(y_test, y_pred_prophet)
        mape_prophet = mape_score(y_test, y_pred_prophet)
        
        results['Prophet']['mae'].append(mae_score(y_test, y_pred_prophet))
        results['Prophet']['mape'].append(mape_prophet)
        
        print(f"Prophet    --> MAE: {mae_prophet:.4f}, MAPE: {mape_prophet:.4f}")
    
    # --------------------------
    if 'XGB' in model_names:
        xgb_model = train_xgb(X_train, y_train, X_test, y_test, XGB_PARAMS)
        y_pred_xgb = evaluate_xgb(xgb_model, X_test)
        
        mae_xgb = mae_score(y_test, y_pred_xgb)
        mape_xgb = mape_score(y_test, y_pred_xgb)
        
        results['XGB']['mae'].append(mae_xgb)
        results['XGB']['mape'].append(mape_xgb)
        
        print(f"XGBoost    --> MAE: {mae_xgb:.4f}, MAPE: {mape_xgb:.4f}")
    
    # --------------------------
    if 'LGBM' in model_names:
        lgbm_model = train_lgbm(X_train, y_train,  X_test, y_test, LGBM_PARAMS)
        y_pred_lgbm = evaluate_lgbm(lgbm_model, X_test)

        mae_lgbm = mae_score(y_test, y_pred_lgbm)
        mape_lgbm = mape_score(y_test, y_pred_lgbm)

        results['LGBM']['mae'].append(mae_lgbm)
        results['LGBM']['mape'].append(mape_lgbm)

        print(f"LightGBM   --> MAE: {mae_lgbm:.4f}, MAPE: {mape_lgbm:.4f}")

    # --------------------------
    if 'TimesFM' in model_names:
        X_train_timesfm, y_train_timesfm, X_test_timesfm = prepare_timesfm(X_train, y_train, X_test)  
        
        print(f"X_train_timesfm {X_train_timesfm}")
        print(f"y_train_timesfm {y_train_timesfm}")
        print(f"X_test_timesfm {X_test_timesfm}")
        
        timesfm_model = train_timesfm(X_train_timesfm, y_train_timesfm, TIMESFM_MODEL_PARAMS, TIMESFM_TRAIN_PARAMS)
        y_pred_timesfm = evaluate_timesfm(timesfm_model, X_test_timesfm)
        mae_timesfm = mae_score(y_test, y_pred_timesfm)
        mape_timesfm = mape_score(y_test, y_pred_timesfm)
        
        results['TimesFM']['mae'].append(mae_timesfm)
        results['TimesFM']['mape'].append(mape_timesfm)
        
        print(f"TimesFM   --> MAE: {mae_timesfm:.4f}, MAPE: {mape_timesfm:.4f}")
    
    # --------------------------
    # LSTM Model (PyTorch)
    # --------------------------
    if 'LSTM' in model_names:
        X_train_lstm, y_train_lstm, X_test_timesfm = prepare_lstm(X_train, y_train, X_test)
        lstm_model = train_lstm(X_train_lstm, y_train_lstm, LSTM_MODEL_PARAMS, LSTM_TRAIN_PARAMS)
        y_pred_lstm = evaluate_lstm(lstm_model, X_test_timesfm)
        
        mae_lstm = mae_score(y_test, y_pred_lstm)
        mape_lstm = mape_score(y_test, y_pred_lstm)
        
        results['LSTM']['mae'].append(mae_lstm)
        results['LSTM']['mape'].append(mape_lstm)
        
        print(f"LSTM       --> MAE: {mae_lstm:.4f}, MAPE: {mape_lstm:.4f}")
    
# --------------------------
# STEP 5: Aggregate and Display Results
# --------------------------
benchmark_results = []
for model in model_names:
    avg_mae = np.mean(results[model]['mae'])
    avg_mape = np.mean(results[model]['mape'])
    benchmark_results.append({
        'Model': model,
        'MAE': avg_mae,
        'MAPE': avg_mape
    })

results_df = pd.DataFrame(benchmark_results)
print("\n===== Benchmark Results =====")
print(results_df)


===== Fold 0 =====
X_train_timesfm tensor([[  1.0000, 113.0000,   0.0000,  ...,  19.6249,   0.0000,   0.0000],
        [  2.0000, 214.0000,   0.0000,  ...,  19.5621,   0.0000,   0.0000],
        [  3.0000, 217.0000,   0.0000,  ...,  19.4998,   0.0000,   0.0000],
        ...,
        [310.0000, 300.0000,  -1.0000,  ...,  27.7208,   0.0000,   0.0000],
        [311.0000, 301.0000,   0.0000,  ...,  27.7200,   0.0000,   0.0000],
        [  0.0000, 177.0000,   0.0000,  ...,  19.4434,   0.0000,   0.0000]])
y_train_timesfm tensor([[  0.0000],
        [ 46.2000],
        [-46.2000],
        ...,
        [ 14.9000],
        [ 51.8000],
        [-67.0000]])
X_test_timesfm tensor([[ 1.0000e+00,  1.1300e+02,  0.0000e+00,  ...,  2.2840e+00,
          8.6599e-02,  5.0396e+00],
        [ 2.0000e+00,  2.1400e+02,  0.0000e+00,  ...,  2.2767e+00,
          9.5149e-02,  6.0839e+00],
        [ 3.0000e+00,  2.1700e+02,  0.0000e+00,  ...,  2.2694e+00,
          4.5533e-02,  1.3932e+00],
        ...,
       

RuntimeError: The size of tensor a (209352) must match the size of tensor b (209664) at non-singleton dimension 0

In [None]:
break

### Feature Importances

In [None]:
XGB_PARAMS = {
    'n_estimators': 1000,
    'early_stopping_rounds': 50,
    'objective': 'reg:squarederror',
    'max_depth': 3,
    'learning_rate': 0.01,
    'verbosity': 0,
    'booster': 'gbtree'
}

# Convert 'fechaHora' column to datetime
data['fechaHora'] = pd.to_datetime(data['fechaHora'])

# Split the data such that the test set represents the last 3 months
split_date = pd.to_datetime('2024-05-01')
train = data[data['fechaHora'] < split_date]
test = data[data['fechaHora'] >= split_date]

EXCLUDED_COLS = ['fechaHora', 'Energia', 'Energia_stationary']
FEATURES = [col for col in data.columns if col not in EXCLUDED_COLS]
TARGET = 'Energia_stationary'

# Common splits for models that require X and y inputs:
X_train = train[FEATURES]
y_train = train[TARGET]
X_test  = test[FEATURES]
y_test  = test[TARGET]

X_train = pipeline.fit_transform(X_train)
X_test = pipeline.transform(X_test)

xgb_model = train_xgb(X_train, y_train, X_test, y_test, XGB_PARAMS)
y_pred_xgb = evaluate_xgb(xgb_model, X_test)

mae_xgb = mae_score(y_test, y_pred_xgb)
mape_xgb = mape_score(y_test, y_pred_xgb)

print(f"XGBoost    --> MAE: {mae_xgb:.4f}, MAPE: {mape_xgb:.4f}")

XGBoost    --> MAE: 36.6586, MAPE: inf


In [None]:
# Create a DataFrame with feature names and their importance scores
importance_df = pd.DataFrame({
    'Feature': FEATURES,
    'Importance': xgb_model.feature_importances_
})

# Sort the DataFrame by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Display the sorted DataFrame
print(importance_df)

                Feature  Importance
25  roll24_mean_Energia    0.410571
0                Codigo    0.105280
1           Descripcion    0.082795
24        lag24_Energia    0.080454
22          lag_Energia    0.062449
2              PrecEuro    0.049419
10                    t    0.043154
3            cum_energy    0.037681
6           day_of_week    0.029406
4                  date    0.019509
21     energia_hour_sin    0.018123
8          day_of_month    0.016967
5                  hour    0.014971
12             cos_24_1    0.010592
11             sin_24_1    0.006584
14             cos_24_2    0.005432
23         lag1_Energia    0.005317
19              dow_sin    0.001294
16             cos_24_3    0.000000
17             hour_sin    0.000000
18             hour_cos    0.000000
20              dow_cos    0.000000
15             sin_24_3    0.000000
9            is_weekend    0.000000
7                 month    0.000000
13             sin_24_2    0.000000


## Hyperparameter Tuning

In [None]:
import optuna
from optuna.samplers import TPESampler
import numpy as np
import pandas as pd

# ============================================
# Assumed pre-defined functions and variables:
# --------------------------------------------
# - data: your full DataFrame.
# - FEATURES, TARGET: list of feature column names and target column name.
# - tss: a time series cross-validation splitter.
# - mae_score: a function to compute MAE.
#
# For each model, you must have implemented:
#   Prophet: prepare_prophet(), train_prophet() [or inline training], evaluate_prophet()
#   XGB: train_xgb(), evaluate_xgb()
#   LGBM: train_lgbm(), evaluate_lgbm()
#   TimesFM: prepare_timesfm(), train_timesfm(), evaluate_timesfm()
#   LSTM: prepare_lstm(), train_lstm(), evaluate_lstm()
# ============================================

def objective(trial):
    # Choose which model to tune in this trial.
    model_name = trial.suggest_categorical("model", ["Prophet", "XGB", "LGBM", "TimesFM", "LSTM"])
    
    fold_scores = []  # To store metric (e.g., MAE) for each CV fold
    
    for train_idx, test_idx in tss.split(data):
        # Split the data for this fold
        train = data.iloc[train_idx]
        test = data.iloc[test_idx]
        
        # For models that require X and y explicitly.
        X_train = train[FEATURES]
        y_train = train[TARGET]
        X_test = test[FEATURES]
        y_test = test[TARGET]
        
        if model_name == "Prophet":
            # Prepare Prophet data
            train_prophet = prepare_prophet(train)
            # Hyperparameters (regularization via changepoint and seasonality scales)
            cp_scale = trial.suggest_float("prophet_changepoint_prior_scale", 0.001, 0.5, log=True)
            seas_scale = trial.suggest_float("prophet_seasonality_prior_scale", 0.01, 10.0, log=True)
            
            # Initialize and train Prophet with extra regressors
            from prophet import Prophet  # or fbprophet depending on your package
            prophet_model = Prophet(changepoint_prior_scale=cp_scale,
                                    seasonality_prior_scale=seas_scale)
            prophet_model.add_regressor('PrecEuro')
            prophet_model.add_regressor('cum_energy')
            prophet_model.fit(train_prophet)
            
            # Evaluate
            y_pred = evaluate_prophet(prophet_model, test)
            
        elif model_name == "XGB":
            # Hyperparameter search space for XGBoost
            xgb_params = {
                "n_estimators": trial.suggest_int("xgb_n_estimators", 100, 1000),
                "max_depth": trial.suggest_int("xgb_max_depth", 3, 10),
                "learning_rate": trial.suggest_float("xgb_learning_rate", 0.001, 0.3, log=True),
                "reg_alpha": trial.suggest_float("xgb_reg_alpha", 0.0, 10.0),
                "reg_lambda": trial.suggest_float("xgb_reg_lambda", 0.0, 10.0),
                "subsample": trial.suggest_float("xgb_subsample", 0.5, 1.0),
                "colsample_bytree": trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0)
            }
            xgb_model = train_xgb(X_train, y_train, X_test, y_test, xgb_params)
            y_pred = evaluate_xgb(xgb_model, X_test)
            
        elif model_name == "LGBM":
            # Hyperparameter search space for LightGBM
            lgbm_params = {
                "n_estimators": trial.suggest_int("lgbm_n_estimators", 100, 1000),
                "num_leaves": trial.suggest_int("lgbm_num_leaves", 20, 150),
                "learning_rate": trial.suggest_float("lgbm_learning_rate", 0.001, 0.3, log=True),
                "reg_alpha": trial.suggest_float("lgbm_reg_alpha", 0.0, 10.0),
                "reg_lambda": trial.suggest_float("lgbm_reg_lambda", 0.0, 10.0),
                "min_data_in_leaf": trial.suggest_int("lgbm_min_data_in_leaf", 10, 100)
            }
            lgbm_model = train_lgbm(X_train, y_train, X_test, y_test, lgbm_params)
            y_pred = evaluate_lgbm(lgbm_model, X_test)
            
        elif model_name == "TimesFM":
            # Prepare TimesFM data (assumed to have its own preparation method)
            X_train_tf, y_train_tf, X_test_tf = prepare_timesfm(X_train, y_train, X_test)
            # Hyperparameters for TimesFM model (example values)
            timesfm_model_params = {
                "learning_rate": trial.suggest_float("timesfm_learning_rate", 0.001, 0.1, log=True),
                "reg_alpha": trial.suggest_float("timesfm_reg_alpha", 0.0, 10.0),
                "reg_lambda": trial.suggest_float("timesfm_reg_lambda", 0.0, 10.0)
            }
            timesfm_train_params = {
                "epochs": trial.suggest_int("timesfm_epochs", 10, 200)
            }
            timesfm_model = train_timesfm(X_train_tf, y_train_tf, timesfm_model_params, timesfm_train_params)
            y_pred = evaluate_timesfm(timesfm_model, X_test_tf)
            
        elif model_name == "LSTM":
            # Prepare LSTM data (using your own prepare_lstm)
            X_train_lstm, y_train_lstm, X_test_lstm = prepare_lstm(X_train, y_train, X_test)
            # Hyperparameter search space for LSTM (e.g., architecture + training reg params)
            lstm_model_params = {
                "num_layers": trial.suggest_int("lstm_num_layers", 1, 3),
                "hidden_size": trial.suggest_int("lstm_hidden_size", 16, 128),
                "dropout": trial.suggest_float("lstm_dropout", 0.0, 0.5)
            }
            lstm_train_params = {
                "learning_rate": trial.suggest_float("lstm_learning_rate", 1e-5, 1e-2, log=True),
                "weight_decay": trial.suggest_float("lstm_weight_decay", 0.0, 0.1),
                "epochs": trial.suggest_int("lstm_epochs", 10, 200)
            }
            lstm_model = train_lstm(X_train_lstm, y_train_lstm, lstm_model_params, lstm_train_params)
            y_pred = evaluate_lstm(lstm_model, X_test_lstm)
        
        # Calculate the evaluation metric (e.g., MAE) for this fold
        fold_mae = mae_score(y_test, y_pred)
        fold_scores.append(fold_mae)
    
    # Return the average score across folds (Optuna minimizes the objective)
    return np.mean(fold_scores)


if __name__ == "__main__":
    # Set up Optuna with a TPE sampler (using a seed for reproducibility)
    sampler = TPESampler(seed=42)
    study = optuna.create_study(direction="minimize", sampler=sampler)
    
    # Run the optimization (adjust n_trials as needed)
    study.optimize(objective, n_trials=50)
    
    # Print out the best parameters and corresponding score
    print("Best trial:")
    best_trial = study.best_trial
    print(f"  MAE: {best_trial.value:.4f}")
    print("  Hyperparameters:")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")


[I 2025-02-13 22:12:58,419] A new study created in memory with name: no-name-0414fe39-42f3-43e1-a01a-900313c5888e
[W 2025-02-13 22:12:58,421] Trial 0 failed with parameters: {'model': 'XGB'} because of the following error: ValueError('Too many splits=5 for number of samples=703246 with test_size=209664 and gap=24.').
Traceback (most recent call last):
  File "/opt/anaconda3/envs/ai_env/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/p1/vj5nk3dj08nbdgjg1lst8k_w0000gn/T/ipykernel_1568/2975525036.py", line 28, in objective
    for train_idx, test_idx in tss.split(data):
  File "/opt/anaconda3/envs/ai_env/lib/python3.11/site-packages/sklearn/model_selection/_split.py", line 1252, in _split
    raise ValueError(
ValueError: Too many splits=5 for number of samples=703246 with test_size=209664 and gap=24.
[W 2025-02-13 22:12:58,422] Trial 0 failed with value None.


ValueError: Too many splits=5 for number of samples=703246 with test_size=209664 and gap=24.