# L'objectif est d'expérimenter différentes tailles de fenêtres temporelles (window_size) pour trouver celle qui donne les meilleures performances.

## Méthodologie
* Définir une liste de tailles de fenêtres (window_size) à tester, par exemple [30, 60, 90, 120].
* Créer des séquences avec chaque window_size et un prediction_size fixe.\\
* Entraîner le modèle LSTM sur chaque fenêtre.
* Évaluer les performances avec RMSE, MAE et R².
* Comparer les performances pour choisir la meilleure fenêtre.


## Preguntas

- ¿Por qué, si la validation loss oscila tanto, no paras el entrenamiento antes?
- ¿Que es RobustNormalization?
- Añadir tensor board para seguir el entrenamiento
- Quizas no séa relevante para el entrenamiento y la predicción del modelo pero ¿el hecho de que robust scaler haga que haya lluvia negativa no va a afectar? Quizas habría que revisarlo.

In [None]:
!pip3 install -q torch --index-url https://download.pytorch.org/whl/cpu

In [None]:
from pathlib import Path
import pandas as pd
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
from ombs_senegal.time_series_deepl import Learner, HydroDataset, split_by_date
from sklearn.preprocessing import RobustScaler


DATA_PATH = Path("../../data")
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Data preprocessing

In [None]:
data = pd.read_csv(
    DATA_PATH/'data_cumul.csv', 
    sep=';', 
    usecols=['time', 'débit_insitu', 'P_cumul_7j', 'débit_mgb'], 
    index_col='time',
    converters={"time": pd.to_datetime}
    )
data = data["2012-01-01":]
data["mois"] = data.index.month

In [None]:
train, valid, test = split_by_date(data, val_dates=("2018-01-01", "2018-12-31"), test_dates=("2019-01-01", "2020-12-31"))

Now lets define the feature and the target columns and divide data in feature and targets

In [None]:
x_cols = ["débit_mgb","P_cumul_7j", "mois"]
y_cols = ["débit_insitu"]

x_train, y_train = train[x_cols], train[y_cols]
x_valid, y_valid = valid[x_cols], valid[y_cols]
x_test, y_test = test[x_cols], test[y_cols]

Now we will fit the scaler based only on train data. This ensures that:
1. No information from the validation/test data sets leaks to into the scaling process
2. All data is scaled consistently using the same parameters
3. The model sees new data scaled in the same way as it was trained

In [None]:
feature_scaler, target_scaler = RobustScaler(), RobustScaler()
_, _ = feature_scaler.fit_transform(x_train), target_scaler.fit_transform(y_train)

## Model definition

#### Multi layer perceptron (MLP)

In [None]:
class SimpleRegularizedMLP(nn.Module):
    def __init__(self, input_size, prediction_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.norm = nn.LayerNorm(64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, prediction_size)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.norm(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x


#### Long Short Term Memory (LSTM)

In [None]:
class SimpleRegularizedLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, prediction_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size, prediction_size)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        x = hn[-1]
        x = self.norm(x)
        x = self.dropout(x)
        x = self.fc(x)
        return x


In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, prediction_size):
        super().__init__()
        self.bilstm1 = nn.LSTM(input_size, 256, bidirectional=True, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.bilstm2 = nn.LSTM(512, 128, bidirectional=True, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.lstm = nn.LSTM(256, 64, batch_first=True)
        self.dropout3 = nn.Dropout(0.2)
        self.dense1 = nn.Linear(64, 128)
        self.relu = nn.ReLU()
        self.dropout4 = nn.Dropout(0.2)
        self.dense2 = nn.Linear(128, prediction_size)

    def forward(self, x):
        x, _ = self.bilstm1(x)
        x = self.dropout1(x)
        x, _ = self.bilstm2(x)
        x = self.dropout2(x)
        #x, _ = self.lstm(x)
        x, (hn, _) = self.lstm(x)
        x = hn[-1]   
        x = self.dropout3(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dropout4(x)
        x = self.dense2(x)
        return x
    


#### Gated Recurrent Units (GRU)

In [None]:
class SimpleRegularizedGRU(nn.Module):
    def __init__(self, input_size, hidden_size, prediction_size):
        super().__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(hidden_size, prediction_size)

    def forward(self, x):
        _, hn = self.gru(x)
        x = hn[-1]
        x = self.norm(x)
        x = self.dropout(x)
        x = self.fc(x)
        return x


#### Convolutional Neural Networks (CNN)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TemporalCausalCNN(nn.Module):
    def __init__(self, input_channels, window_size, prediction_size):
        super().__init__()
        self.window_size = window_size

        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=3)
        self.norm1 = nn.LayerNorm([window_size, 32])
        self.dropout = nn.Dropout(0.3)

        self.conv2 = nn.Conv1d(32, 64, kernel_size=3)
        self.norm2 = nn.LayerNorm([window_size, 64])

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(64, prediction_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Entrada: (batch, time, variables) → permutar para Conv1d
        x = x.permute(0, 2, 1)  # (B, C, T)

        # Padding causal antes de cada convolución
        x = F.pad(x, (2, 0))  # padding izquierda = kernel_size - 1
        x = self.conv1(x)     # (B, 32, T)
        x = x.permute(0, 2, 1)
        x = self.norm1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = x.permute(0, 2, 1)

        x = F.pad(x, (2, 0))
        x = self.conv2(x)     # (B, 64, T)
        x = x.permute(0, 2, 1)
        x = self.norm2(x)
        x = self.relu(x)
        x = x.permute(0, 2, 1)

        x = self.global_pool(x).squeeze(-1)  # (B, 64)
        x = self.fc(x)                       # (B, prediction_size)
        return x


## Trainning

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from ombs_senegal.benchmark_model import BenchmarkScores

In [None]:
benchmark_scores = BenchmarkScores()

In [None]:

# 🔹 Listes des tailles de fenêtres à tester
context_sizes = [15]#, 30, 60, 90]
batch_size = 32
learning_rate = 0.0003
epochs=1

prediction_size = 10  # Fixe (peut être ajusté)
x_transform=feature_scaler.transform
y_transform=target_scaler.transform
results = []
models = []

# 🔹 Boucle sur différentes tailles de fenêtres
for context_size in context_sizes:
    print(f"\n🟢 Test avec window_size = {context_size}")

    train_dataset = HydroDataset(x=x_train, y=y_train, ctx_len=context_size, pred_len=prediction_size, x_transform=x_transform, y_transform=y_transform)
    valid_dataset = HydroDataset(x=x_valid, y=y_valid, ctx_len=context_size, pred_len=prediction_size, x_transform=x_transform, y_transform=y_transform)
    test_dataset = HydroDataset(x=x_test, y=y_test, ctx_len=context_size, pred_len=prediction_size, x_transform=x_transform, y_transform=y_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # 🔹 Vérification des dimensions
    # model = LSTMModel(len(x_cols), prediction_size).to(DEVICE)
    #model = SimpleRegularizedLSTM(len(x_cols), 64, prediction_size).to(DEVICE)
    # model = SimpleRegularizedGRU(len(x_cols), 64, prediction_size).to(DEVICE)
    # model = TemporalCausalCNN(len(x_cols), context_size, prediction_size).to(DEVICE)
    model = SimpleRegularizedMLP(len(x_cols)*context_size, prediction_size).to(DEVICE)
    learner = Learner(model=model, train_loader=train_loader, val_loader=valid_loader)
    learner.fit(lr=learning_rate, epochs=epochs)

    y_pred = learner.predict(test_loader, inverse_transform=target_scaler.inverse_transform)

    y_pred.index.name = "time"
    y_pred["model"] = model.__class__.__name__
    y_pred.set_index(["model"], append=True, inplace=True)
    y_pred = y_pred.to_xarray()
    scores = benchmark_scores.compute_scores(y_pred, y_test.to_xarray()[y_cols[0]], metrics=["mae", "rmse", "nse", "kge"])

    mean_scores = {s.upper(): round(float(scores[s].mean().values), 2) for s in scores.data_vars}
    print(f"📊 Résultats pour window_size={context_size} -> {mean_scores}")

    # 🔹 Stocker les résultats
    results.append({"ctx_size": context_size, **mean_scores})

# 🔹 Afficher le meilleur résultat
best = min(results, key=lambda x: x["RMSE"])  # Choix basé sur le RMSE le plus bas
print(f"\n✅ Meilleure fenêtre : {best["ctx_size"]} avec RMSE={best["RMSE"]}, MAE={best["MAE"]}, NSE={best["NSE"]}, KGE={best["KGE"]}")


- MLP: Meilleure fenêtre : 60 avec RMSE=156.864, MAE=78.964, R²=0.861
- CNN: Meilleure fenêtre : 30 avec RMSE=191.998, MAE=92.670, R²=0.786
- GRU: Meilleure fenêtre : 60 avec RMSE=212.033, MAE=108.959, R²=0.746
- Simple LSTM: Meilleure fenêtre : 90 avec RMSE=197.514, MAE=104.173, R²=0.785

In [None]:
# 🔹 Fonction pour calculer le PBIAS
def pbias(y_true, y_pred):
    return 100 * np.sum(y_pred - y_true) / np.sum(y_true)

## Learning rate finder development

In [None]:
def lr_find(self, start_lr=1e-7, end_lr=10, num_iter=100, step_mode="exp", show_plot=True):
        """Find a good learning rate by training with exponentially growing lr
            source: https://github.com/fastai/fastai1/blob/master/fastai/train.py#L33

        
        Args:
            start_lr (float): Starting learning rate
            end_lr (float): Maximum learning rate
            num_iter (int): Number of iterations to run
            step_mode (str): "exp" for exponential increase, "linear" for linear increase
            show_plot (bool): Whether to display the loss plot
            
        Returns:
            tuple: (optimal_lr, learning_rates, losses)
        """
        # Save the original model state
        original_state = {
            'model': self.model.state_dict(),
            'optimizer': self.optimizer
        }
        
        # Initialize optimizer with start_lr
        optimizer = self.optimizer(self.model.parameters(), lr=start_lr)
        
        # Calculate the multiplication factor for each step
        if step_mode == "exp":
            gamma = (end_lr / start_lr) ** (1 / num_iter)
        else:
            gamma = (end_lr - start_lr) / num_iter
            
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma) if step_mode == "exp" else None
        
        learning_rates = []
        losses = []
        best_loss = float('inf')
        
        # Create iterator for training data
        iterator = iter(self.train_loader)
        
        for iteration in range(num_iter):
            try:
                batch_X, batch_y = next(iterator)
            except StopIteration:
                iterator = iter(self.train_loader)
                batch_X, batch_y = next(iterator)
                
            # Forward pass
            self.model.train()
            optimizer.zero_grad()
            outputs = self.model(batch_X)
            loss = self.criterion(outputs, batch_y.squeeze())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Store the values
            current_lr = optimizer.param_groups[0]['lr']
            learning_rates.append(current_lr)
            losses.append(loss.item())
            
            # Update learning rate
            if step_mode == "exp":
                scheduler.step()
            else:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = start_lr + (gamma * (iteration + 1))
            
            # Stop if the loss is exploding
            if iteration > 0 and losses[-1] > 4 * best_loss:
                break
                
            if losses[-1] < best_loss:
                best_loss = losses[-1]
        
        # Restore the original model state
        self.model.load_state_dict(original_state['model'])
        
        if show_plot:
            plt.figure(figsize=(10, 6))
            plt.plot(learning_rates, losses)
            plt.xscale('log')
            plt.xlabel('Learning Rate (log scale)')
            plt.ylabel('Loss')
            plt.title('Learning Rate Finder')
            plt.show()
            
        # Find the point of steepest descent
        smoothed_losses = np.array(losses)
        min_grad_idx = np.gradient(smoothed_losses).argmin()
        optimal_lr = learning_rates[min_grad_idx]
            
        return optimal_lr, learning_rates, losses