# L'objectif est d'exp√©rimenter diff√©rentes tailles de fen√™tres temporelles (window_size) pour trouver celle qui donne les meilleures performances.

## M√©thodologie
* D√©finir une liste de tailles de fen√™tres (window_size) √† tester, par exemple [30, 60, 90, 120].
* Cr√©er des s√©quences avec chaque window_size et un prediction_size fixe.\\
* Entra√Æner le mod√®le LSTM sur chaque fen√™tre.
* √âvaluer les performances avec RMSE, MAE et R¬≤.
* Comparer les performances pour choisir la meilleure fen√™tre.


## Preguntas

- ¬øPor qu√©, si la validation loss oscila tanto, no paras el entrenamiento antes?
- ¬øQue es RobustNormalization?
- A√±adir tensor board para seguir el entrenamiento
- Quizas no s√©a relevante para el entrenamiento y la predicci√≥n del modelo pero ¬øel hecho de que robust scaler haga que haya lluvia negativa no va a afectar? Quizas habr√≠a que revisarlo.

In [None]:
!pip install -q plotly tensorflow

In [None]:
!pip3 install -q torch --index-url https://download.pytorch.org/whl/cpu

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from pathlib import Path


DATA_PATH = Path("../../data")

In [None]:
data = pd.read_csv(DATA_PATH / "data_cumul.csv", delimiter=";", parse_dates=True, index_col="time")
data = data[["P_cumul_7j","d√©bit_mgb","d√©bit_insitu"]]
data = data["2012-01-01":]
data["mois"] = data.index.month
data.head(5)

In [None]:
data.describe()

In [None]:
# pd.DataFrame(features_scaled, index=data.index, columns=['P_cumul_7j','d√©bit_mgb',"mois"])["P_cumul_7j"].plot()


#### Data preprocessing

In [None]:
from sklearn.preprocessing import RobustScaler
# Mise √† l'√©chelle avec RobustScaler
scaler_features = RobustScaler()
scaler_target = RobustScaler()
#features =data[["d√©bit_mgb_imerg","Q1_IMERG","Q2_IMERG","Q3_IMERG","mgb_wse_imerg"]].values
features =data[['P_cumul_7j','d√©bit_mgb',"mois"]].values
#features=data[["d√©bit_mgb_imerg","Q1_IMERG","Q2_IMERG","Q3_IMERG","mgb_wse_imerg"]].values
target =data['d√©bit_insitu'].values.reshape(-1, 1)
targets =data['d√©bit_mgb'].values.reshape(-1, 1)
features_scaled = scaler_features.fit_transform(features)
target_scaled = scaler_target.fit_transform(target)
targets_scaled = scaler_target.fit_transform(targets)

# Diviser les donn√©es en train, validation et test
train_size = int(len(features_scaled) * 0.6)
val_size = int(len(features_scaled) * 0.2)
test_size = len(features_scaled) - train_size - val_size

train_features = features_scaled[:train_size]
train_target = target_scaled[:train_size]

val_features = features_scaled[train_size:train_size + val_size]
val_target = target_scaled[train_size:train_size + val_size]

test_features = features_scaled[train_size + val_size:]
test_target = target_scaled[train_size + val_size:]
test_targets = targets_scaled[train_size + val_size:]

#### Model definition

In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, window_size, prediction_size):
        super().__init__()
        self.bilstm1 = nn.LSTM(input_size, 256, bidirectional=True, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.bilstm2 = nn.LSTM(512, 128, bidirectional=True, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.lstm = nn.LSTM(256, 64, batch_first=True)
        self.dropout3 = nn.Dropout(0.2)
        self.dense1 = nn.Linear(64, 128)
        self.relu = nn.ReLU()
        self.dropout4 = nn.Dropout(0.2)
        self.dense2 = nn.Linear(128, prediction_size)

    def forward(self, x):
        x, _ = self.bilstm1(x)
        x = self.dropout1(x)
        x, _ = self.bilstm2(x)
        x = self.dropout2(x)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Take last output
        x = self.dropout3(x)
        x = self.dense1(x)
        x = self.relu(x)
        x = self.dropout4(x)
        x = self.dense2(x)
        return x
    # üîπ Initialisation du mod√®le


#### Learner definition

In [None]:
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader

from torch.utils.tensorboard import SummaryWriter

class Learner:
    def __init__(self,
                 model: nn.Module, # model to train
                 train_loader: DataLoader, # data loader for training data
                 val_loader: DataLoader, # data loader for validation data
                 criterion: nn.Module = nn.MSELoss(), # loss function to optimize
                 optimizer: torch.optim.Optimizer = torch.optim.Adam, # optimizer class to use for training
                 log_dir: str = 'runs', # directory to save tensorboard logs
                 ) -> None:
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.writer = SummaryWriter(log_dir)

    def fit(self, lr=0.001, epochs=10):
        optimizer = self.optimizer(self.model.parameters(), lr=lr)
        for epoch in tqdm(range(epochs), desc='Training epochs'):
            # Training
            self.model.train()
            epoch_loss = 0
            for batch_X, batch_y in self.train_loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = self.criterion(outputs, batch_y.squeeze())
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            avg_train_loss = epoch_loss/len(self.train_loader)
            self.writer.add_scalar('Training Loss/epoch', avg_train_loss, epoch)

            # Validation
            self.model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch_X, batch_y in self.val_loader:
                    val_outputs = self.model(batch_X)
                    val_loss += self.criterion(val_outputs, batch_y.squeeze()).item()
            
            avg_val_loss = val_loss/len(self.val_loader)
            self.writer.add_scalar('Validation Loss/epoch', avg_val_loss, epoch)
            
            # print(f'Epoch {epoch+1}, Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    
    def predict(self, dl: DataLoader):
        self.model.eval()
        predictions = []
        targets = []
        with torch.no_grad():
            for batch_X, batch_y in dl:
                batch_pred = self.model(batch_X).cpu().numpy()
                predictions.append(batch_pred)
        
        return np.vstack(predictions)

In [None]:
model = LSTMModel(X_train.shape[2], window_size, prediction_size).to(device)
learner = Learner(model, train_loader, val_loader)

In [None]:
learner.lr_find()

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# üîπ D√©finition de la fonction pour cr√©er des s√©quences
def create_sequences(features, target, window_size, prediction_size):
    X, y = [], []
    for i in range(len(features) - window_size - prediction_size):
        X.append(features[i:i+window_size])
        y.append(target[i+window_size:i+window_size+prediction_size])
    return torch.FloatTensor(X).to(device), torch.FloatTensor(y).to(device)

# üîπ Listes des tailles de fen√™tres √† tester
window_sizes = [10,]#20,30, 60, 90, 120]
prediction_size = 10  # Fixe (peut √™tre ajust√©)
batch_size = 48  # D√©finition de la taille des batchs
results = []
models = []

# üîπ Boucle sur diff√©rentes tailles de fen√™tres
for window_size in window_sizes:
    print(f"\nüü¢ Test avec window_size = {window_size}")

    # Cr√©ation des s√©quences
    X_train, y_train = create_sequences(train_features, train_target, window_size, prediction_size)
    X_val, y_val = create_sequences(val_features, val_target, window_size, prediction_size)
    X_test, y_test = create_sequences(test_features, test_target, window_size, prediction_size)

    # Cr√©ation des DataLoaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)
    break

    # üîπ V√©rification des dimensions
    model = LSTMModel(X_train.shape[2], window_size, prediction_size).to(device)
    learner = Learner(model, train_loader, val_loader)
    learner = Learner(model, train_loader, val_loader)
    learner.fit(lr=0.0001, epochs=20)

    y_pred = learner.predict(test_loader)

    # üîπ Inversion de l'√©chelle si n√©cessaire
    y_test_rescaled = scaler_target.inverse_transform(y_test.reshape(-1, prediction_size))
    y_pred_rescaled = scaler_target.inverse_transform(y_pred.reshape(-1, prediction_size))

    # üîπ Calcul des m√©triques (moyenne sur l'horizon de 10 jours)
    rmse = np.mean([np.sqrt(mean_squared_error(y_test_rescaled[:, t], y_pred_rescaled[:, t])) for t in range(prediction_size)])
    mae = np.mean([mean_absolute_error(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])
    r2 = np.mean([r2_score(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])

    print(f"üìä R√©sultats pour window_size={window_size} -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R¬≤: {r2:.3f}")

    # üîπ Stocker les r√©sultats
    results.append((window_size, rmse, mae, r2))

# üîπ Afficher le meilleur r√©sultat
best_window = min(results, key=lambda x: x[1])  # Choix bas√© sur le RMSE le plus bas
print(f"\n‚úÖ Meilleure fen√™tre : {best_window[0]} avec RMSE={best_window[1]:.3f}, MAE={best_window[2]:.3f}, R¬≤={best_window[3]:.3f}")


#### Model training

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# üîπ D√©finition de la fonction pour cr√©er des s√©quences
def create_sequences(features, target, window_size, prediction_size):
    X, y = [], []
    for i in range(len(features) - window_size - prediction_size):
        X.append(features[i:i+window_size])
        y.append(target[i+window_size:i+window_size+prediction_size])
    return torch.FloatTensor(X).to(device), torch.FloatTensor(y).to(device)

# üîπ Listes des tailles de fen√™tres √† tester
window_sizes = [10,]#20,30, 60, 90, 120]
prediction_size = 10  # Fixe (peut √™tre ajust√©)
batch_size = 48  # D√©finition de la taille des batchs
results = []
models = []

# üîπ Boucle sur diff√©rentes tailles de fen√™tres
for window_size in window_sizes:
    print(f"\nüü¢ Test avec window_size = {window_size}")

    # Cr√©ation des s√©quences
    X_train, y_train = create_sequences(train_features, train_target, window_size, prediction_size)
    X_val, y_val = create_sequences(val_features, val_target, window_size, prediction_size)
    X_test, y_test = create_sequences(test_features, test_target, window_size, prediction_size)

    # Cr√©ation des DataLoaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
    test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)

    # üîπ V√©rification des dimensions
    model = LSTMModel(X_train.shape[2], window_size, prediction_size).to(device)
    learner = Learner(model, train_loader, val_loader)
    learner.fit(lr=0.0001, epochs=20)

    y_pred = learner.predict(test_loader)

    # üîπ Inversion de l'√©chelle si n√©cessaire
    y_test_rescaled = scaler_target.inverse_transform(y_test.reshape(-1, prediction_size))
    y_pred_rescaled = scaler_target.inverse_transform(y_pred.reshape(-1, prediction_size))

    # üîπ Calcul des m√©triques (moyenne sur l'horizon de 10 jours)
    rmse = np.mean([np.sqrt(mean_squared_error(y_test_rescaled[:, t], y_pred_rescaled[:, t])) for t in range(prediction_size)])
    mae = np.mean([mean_absolute_error(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])
    r2 = np.mean([r2_score(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])

    print(f"üìä R√©sultats pour window_size={window_size} -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R¬≤: {r2:.3f}")

    # üîπ Stocker les r√©sultats
    results.append((window_size, rmse, mae, r2))

# üîπ Afficher le meilleur r√©sultat
best_window = min(results, key=lambda x: x[1])  # Choix bas√© sur le RMSE le plus bas
print(f"\n‚úÖ Meilleure fen√™tre : {best_window[0]} avec RMSE={best_window[1]:.3f}, MAE={best_window[2]:.3f}, R¬≤={best_window[3]:.3f}")


#### Assesment

In [None]:
import matplotlib.pyplot as plt

# Extraire les valeurs
window_sizes, rmse_values, mae_values, r2_values = zip(*results)

# Tracer l'√©volution du RMSE
plt.figure(figsize=(8, 5))
plt.plot(window_sizes, rmse_values, marker='o', linestyle='-', color='b', label="RMSE")
plt.xlabel("Taille de la fen√™tre temporelle")
plt.ylabel("Erreur (RMSE)")
plt.title("Impact de la fen√™tre temporelle sur la pr√©cision")
plt.legend()
plt.grid(True)
plt.show()


# Conclusions
* Si window_size est trop petit, le mod√®le manque de contexte et ne capture pas bien les tendances.
* Si window_size est trop grand, il risque d‚Äôavoir trop d‚Äôinformations inutiles et de perdre en g√©n√©ralisation.
* Le test permet de trouver un compromis optimal pour minimiser lerreur (RMSE, MAE) et maximiser R¬≤.

# Encha√Æner avec la meilleure fen√™tre temporelle
* Apr√®s avoir trouv√© la meilleure taille de fen√™tre temporelle (best_window), l'id√©e est de :

* R√©entra√Æner le mod√®le LSTM avec cette meilleure window_size.
* Effectuer la pr√©diction finale sur X_test.
* Analyser l'erreur moyenne par jour de pr√©diction pour voir comment elle √©volue sur les 10 jours.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.losses import Huber
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# üîπ D√©finition de la meilleure fen√™tre temporelle (apr√®s test)
best_window_size = 10 #best_window[0]  # R√©cup√©rer la taille optimale trouv√©e pr√©c√©demment
prediction_size = 10  # Horizon de pr√©vision

# üîπ Cr√©ation des s√©quences avec la meilleure fen√™tre
X_train, y_train = create_sequences(train_features, train_target, best_window_size, prediction_size)
X_val, y_val = create_sequences(val_features, val_target, best_window_size, prediction_size)
X_test, y_test = create_sequences(test_features, test_target, best_window_size, prediction_size)
x_test, y_test_mgb = create_sequences(test_features, test_targets, best_window_size, prediction_size)

# üîπ V√©rification des dimensions
print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")

# üîπ Red√©finition du mod√®le LSTM
model = Sequential([
    Bidirectional(LSTM(256, return_sequences=True, input_shape=(best_window_size, X_train.shape[2]))),
    Dropout(0.2),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(prediction_size)
])

# üîπ Compilation et entra√Ænement
model.compile(optimizer='adam',loss=Huber(), metrics=['mae'])
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32, verbose=1)

# üîπ Pr√©diction finale avec la meilleure fen√™tre
y_pred = model.predict(X_test)

# üîπ Inversion de l'√©chelle des pr√©dictions et des valeurs r√©elles
y_test_rescaled = scaler_target.inverse_transform(y_test.reshape(-1, prediction_size))
y_pred_rescaled = scaler_target.inverse_transform(y_pred.reshape(-1, prediction_size))
y_test_mgb_rescaled = scaler_target.inverse_transform(y_test_mgb.reshape(-1, prediction_size))

# üîπ Calcul des m√©triques de performance
rmse = np.mean([np.sqrt(mean_squared_error(y_test_rescaled[:, t], y_pred_rescaled[:, t])) for t in range(prediction_size)])
mae = np.mean([mean_absolute_error(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])
r2 = np.mean([r2_score(y_test_rescaled[:, t], y_pred_rescaled[:, t]) for t in range(prediction_size)])

print(f"\nüìä R√©sultats finaux avec window_size={best_window_size} -> RMSE: {rmse:.3f}, MAE: {mae:.3f}, R¬≤: {r2:.3f}")

# üîπ Calcul de l'erreur absolue par jour de pr√©diction
error = np.abs(y_test_rescaled - y_pred_rescaled)

# üîπ Affichage de l'√©volution de l'erreur moyenne par jour de pr√©diction
plt.figure(figsize=(12, 5))
plt.plot(np.mean(error, axis=0), marker='o', linestyle='dashed', color='red')

plt.xlabel("Jour de pr√©diction de (t+1 √† t+10)")
plt.ylabel("Erreur moyenne")
plt.title("√âvolution de l'erreur de pr√©diction par jour")
plt.grid()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
# avec loss =Huber
# üîπ Reconstruction des s√©ries temporelles
y_test_mgb_reconstructed = np.concatenate([y_test_mgb_rescaled[i] for i in range(y_test_mgb_rescaled.shape[0] - 1)], axis=0)
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    y_test_mgb =y_test_mgb_reconstructed [i::prediction_size]  # Prend les pr√©dictions pour le jour i
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Q_obs", color='blue')
    axes[i].plot(y_pred, label="Q_pred (pr√©dit)", color='red', linestyle='solid')
    axes[i].plot(y_test_mgb, label="Q_mgb", color='black')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des cibles et pr√©dictions
y_test_reconstructed = np.concatenate(y_test_rescaled[:-1], axis=0)
y_pred_reconstructed = np.concatenate(y_pred_rescaled[:-1], axis=0)

# üîπ R√©cup√©ration du d√©bit_mgb dans les features (par exemple √† l‚Äôindice 1)
debit_mgb_index = 1

# üîπ On r√©cup√®re la derni√®re valeur de chaque s√©quence (align√©e avec chaque pr√©diction de 10 jours)
debit_mgb_series = X_test[:-1, -1, debit_mgb_index].reshape(-1, 1)

# üîπ R√©p√©ter cette derni√®re valeur pour chaque jour d‚Äôhorizon
# Car chaque s√©quence g√©n√®re 10 pr√©dictions (horizon = 10)
debit_mgb_reconstructed = np.tile(debit_mgb_series, (1, prediction_size)).reshape(-1)

# üîπ Cr√©ation des sous-graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):
    y_true = y_test_reconstructed[i::prediction_size]
    y_pred = y_pred_reconstructed[i::prediction_size]
    debit_mgb = debit_mgb_reconstructed[i::prediction_size]

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    corr = np.corrcoef(y_true, y_pred)[0, 1]

    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred", color='red', linestyle='solid')
    axes[i].plot(debit_mgb, label="D√©bit MGB", color='green', linestyle='dotted')

    axes[i].set_title(f"Jour {i+1} ‚Äì RMSE={rmse:.2f}, Corr={corr:.2f}")
    axes[i].legend()
    axes[i].grid()

plt.suptitle("Comparaison r√©elle / pr√©dite avec D√©bit MGB sur les 10 jours d‚Äôhorizon", fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
print("Shape de X_test :", x_rain.shape)

In [None]:
# Exemple : on suppose que la variable pluie est la premi√®re colonne (colonne 0) des features
# Donc on r√©cup√®re la m√™me variable dans les X_test (m√™me d√©coupage que y_test)
rain_feature = test_features[:, 0]  # ou change le 0 selon la variable souhait√©e

# On recr√©e les s√©quences pour matcher les index
y_rain = create_sequences(test_features, test_target, window_size=best_window_size, prediction_size=prediction_size)

# Comme y_rain n'est pas utilis√© pour pr√©dire mais pour visualiser, on peut extraire la m√™me logique
rain_reconstructed = np.concatenate([y_rain[i] for i in range(y_rain.shape[0] - 1)], axis=0)
    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')
    axes[i].plot(rain_reconstructed[i::prediction_size], label="Pluie (entr√©e)", color='green', linestyle='dotted')


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
# avec loss =Huber
# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='solid')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des sous-graphiques avec Plotly
fig = go.Figure()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation
    
    # Ajouter un sous-graphique pour chaque jour
    fig.add_trace(go.Scatter(y=y_true, mode='lines', name=f'Y_test Jour {i+1}', line=dict(color='blue')))
    fig.add_trace(go.Scatter(y=y_pred, mode='lines', name=f'Y_pred Jour {i+1}', line=dict(color='red', dash='dash')))
    
    # Ajouter des annotations
    fig.add_annotation(
        x=len(y_true) - 1, y=max(y_true),
        text=f"Jour {i+1}<br>RMSE={rmse:.2f}, Corr={correlation:.2f}",
        showarrow=False, font=dict(size=10)
    )

# Mise en forme du graphe
fig.update_layout(
    title="Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours",
    xaxis_title="Temps",
    yaxis_title="Valeurs",
    template="plotly_white"
)

# Afficher le graphique
fig.show()


In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_squared_error

def kge(sim, obs):
    cc = np.corrcoef(sim, obs)[0, 1]
    alpha = np.std(sim) / np.std(obs)
    beta = np.mean(sim) / np.mean(obs)
    return 1 - np.sqrt((cc - 1)**2 + (alpha - 1)**2 + (beta - 1)**2)

def pbias(sim, obs):
    return 100 * np.sum(sim - obs) / np.sum(obs)

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation du subplot interactif
fig = make_subplots(rows=5, cols=2, subplot_titles=[f"Jour {i+1}" for i in range(10)])

for i in range(10):  # Pour chaque jour de l'horizon de pr√©diction
    y_true = y_test_reconstructed[i::10]
    y_pred = y_pred_reconstructed[i::10]
    
    # Calcul des m√©triques
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]
    kge_value = kge(y_pred, y_true)
    pbias_value = pbias(y_pred, y_true)
    
    # Ajouter la courbe des valeurs r√©elles
    fig.add_trace(go.Scatter(y=y_true, mode='lines', name='Y_test (r√©el)', line=dict(color='blue')),
                  row=(i//2)+1, col=(i%2)+1)
    
    # Ajouter la courbe des valeurs pr√©dites
    fig.add_trace(go.Scatter(y=y_pred, mode='lines', name='Y_pred (pr√©dit)', line=dict(color='red', dash='dash')),
                  row=(i//2)+1, col=(i%2)+1)
    
    # Ajouter les m√©triques dans le titre du sous-graphe
    fig.update_annotations(
        selector=dict(text=f"Jour {i+1}"),
        text=f"Jour {i+1} \nRMSE={rmse:.2f}, Corr={correlation:.2f}, KGE={kge_value:.2f}, PBIAS={pbias_value:.2f}%"
    )

# Configuration de la mise en page
title_text = "Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours"
fig.update_layout(height=900, width=1200, title_text=title_text, showlegend=False)

# Affichage interactif
fig.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# üîπ Fonction pour calculer le Kling-Gupta Efficiency (KGE)
def kling_gupta_efficiency(y_true, y_pred):
    mean_obs = np.mean(y_true)
    mean_pred = np.mean(y_pred)
    
    r = np.corrcoef(y_true, y_pred)[0, 1]  # Corr√©lation
    beta = mean_pred / mean_obs  # Biais moyen
    alpha = (np.std(y_pred) / mean_pred) / (np.std(y_true) / mean_obs)  # Ratio des √©carts-types
    
    return 1 - np.sqrt((r - 1) ** 2 + (beta - 1) ** 2 + (alpha - 1) ** 2)

# üîπ Fonction pour calculer le PBIAS
def pbias(y_true, y_pred):
    return 100 * np.sum(y_pred - y_true) / np.sum(y_true)

# üîπ Reconstruction des s√©ries temporelles
y_test_reconstructed = np.concatenate([y_test_rescaled[i] for i in range(y_test_rescaled.shape[0] - 1)], axis=0)
y_pred_reconstructed = np.concatenate([y_pred_rescaled[i] for i in range(y_pred_rescaled.shape[0] - 1)], axis=0)

# üîπ Cr√©ation des graphiques
fig, axes = plt.subplots(5, 2, figsize=(14, 10), sharex=True, sharey=True)
axes = axes.flatten()

for i in range(prediction_size):  # Pour chaque jour de l'horizon de pr√©diction
    # S√©lectionner les valeurs correspondantes aux s√©quences
    y_true = y_test_reconstructed[i::prediction_size]  # Prend les vraies valeurs pour le jour i
    y_pred = y_pred_reconstructed[i::prediction_size]  # Prend les pr√©dictions pour le jour i
    
    # Calcul des m√©triques pour le jour i
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    correlation = np.corrcoef(y_true, y_pred)[0, 1]  # Coefficient de corr√©lation
    kge = kling_gupta_efficiency(y_true, y_pred)  # KGE
    pbias_value = pbias(y_true, y_pred)  # PBIAS

    # Tracer les courbes
    axes[i].plot(y_true, label="Y_test (r√©el)", color='blue')
    axes[i].plot(y_pred, label="Y_pred (pr√©dit)", color='red', linestyle='dashed')

    # Ajouter la m√©trique dans le titre
    axes[i].set_title(f"Jour {i+1}\nRMSE={rmse:.2f}, Corr={correlation:.2f}, KGE={kge:.2f}, PBIAS={pbias_value:.2f}%")
    
    axes[i].grid()
    axes[i].legend()

plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites pour chaque jour de l'horizon de 10 jours", fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
import os
import pandas as pd

# üîπ Chemin du r√©pertoire contenant les fichiers .txt

repertoire ="/home/mgning/work/hyfaa/work_configurations/senegal/test_oualia_cs=1_wm=800_b=0.30/"
# üîπ Liste des fichiers .txt dans le r√©pertoire
fichiers = [f for f in os.listdir(repertoire) if f.endswith('.txt')]
# üîπ Cr√©ation d'un DataFrame vide pour fusionner les donn√©es
df_final = pd.DataFrame()

# üîπ Boucle sur tous les fichiers .txt
for fichier in fichiers:
    chemin_fichier = os.path.join(repertoire, fichier)
    
    # Lire le fichier en tant que DataFrame
    df = pd.read_csv(chemin_fichier, sep=";")  # Modifier 'sep' si n√©cessaire
    
    # V√©rifier si la colonne "value" existe
    if "value" in df.columns:
        # Renommer la colonne "value" avec le nom du fichier (sans extension)
        nom_colonne = os.path.splitext(fichier)[0]
        df = df[["value"]].rename(columns={"value": nom_colonne})
        
        # Fusionner les DataFrames (concat√©nation horizontale)
        if df_final.empty:
            df_final = df
        else:
            df_final = pd.concat([df_final, df], axis=1)

# üîπ Affichage du DataFrame final
print(df_final.head())

# üîπ Sauvegarde en CSV si besoin
df_final.to_csv("resultat_final.csv", index=False)


In [None]:
df_final

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# Charger les donn√©es
df = df_final.copy()  # Assurez-vous que df_final contient les bonnes donn√©es

# S√©lection des colonnes
features_columns = ["Q_MANANTALI_AVAL", "Q_GUIERS", "Q_GHORFA", "Q_KABATE", "Q_BAFING_MAKANA"]
targets_columns = ["Q_MANANTALI_AVAL", "Q_GUIERS", "Q_GHORFA", "Q_KABATE", "Q_BAFING_MAKANA"]

# Normalisation des donn√©es
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[features_columns]), columns=features_columns, index=df.index)

# Fonction pour cr√©er les s√©quences de donn√©es
def create_sequences(data, target_columns, window_size, prediction_size):
    X, y = [], []
    for i in range(len(data) - window_size - prediction_size):
        X.append(data.iloc[i:i+window_size].values)
        y.append(data.iloc[i+window_size:i+window_size+prediction_size][target_columns].values)
    return np.array(X), np.array(y)

# Param√®tres
window_size = 10  # Nombre de jours utilis√©s pour la pr√©diction
prediction_size = 10  # Nombre de jours √† pr√©dire

# Cr√©ation des jeux d'entra√Ænement et de test
train_size = int(len(df_scaled) * 0.3)
train_data = df_scaled.iloc[:train_size]
test_data = df_scaled.iloc[train_size:]

X_train, y_train = create_sequences(train_data, targets_columns, window_size, prediction_size)
X_test, y_test = create_sequences(test_data, targets_columns, window_size, prediction_size)

# Construction du mod√®le LSTM
model = Sequential([
    LSTM(100, activation='relu', return_sequences=True, input_shape=(window_size, len(features_columns))),
    LSTM(50, activation='relu'),
    Dense(len(targets_columns) * prediction_size)
])

model.compile(optimizer='adam', loss='mse')

# Entra√Ænement du mod√®le
history = model.fit(X_train, y_train.reshape(y_train.shape[0], -1), epochs=50, batch_size=32, validation_data=(X_test, y_test.reshape(y_test.shape[0], -1)))

# Pr√©diction
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_test.shape)  # Reshape pour correspondre aux dimensions originales

# Inverser la normalisation
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, len(targets_columns))).reshape(y_test.shape)
y_pred_rescaled = scaler.inverse_transform(y_pred.reshape(-1, len(targets_columns))).reshape(y_pred.shape)

# üìä Visualisation des pr√©dictions vs valeurs r√©elles
fig, axes = plt.subplots(5, 1, figsize=(12, 15), sharex=True)

for i, col in enumerate(targets_columns):
    axes[i].plot(y_test_rescaled[:, :, i].flatten(), label="R√©el", color='blue')
    axes[i].plot(y_pred_rescaled[:, :, i].flatten(), label="Pr√©dit", color='red', linestyle='dashed')
    axes[i].set_title(f"Pr√©diction de {col}")
    axes[i].legend()
    axes[i].grid()

plt.xlabel("Temps")
plt.suptitle("Comparaison des valeurs r√©elles et pr√©dites par LSTM")
plt.show()


## Learning rate finder development

In [None]:
def lr_find(self, start_lr=1e-7, end_lr=10, num_iter=100, step_mode="exp", show_plot=True):
        """Find a good learning rate by training with exponentially growing lr
            source: https://github.com/fastai/fastai1/blob/master/fastai/train.py#L33

        
        Args:
            start_lr (float): Starting learning rate
            end_lr (float): Maximum learning rate
            num_iter (int): Number of iterations to run
            step_mode (str): "exp" for exponential increase, "linear" for linear increase
            show_plot (bool): Whether to display the loss plot
            
        Returns:
            tuple: (optimal_lr, learning_rates, losses)
        """
        # Save the original model state
        original_state = {
            'model': self.model.state_dict(),
            'optimizer': self.optimizer
        }
        
        # Initialize optimizer with start_lr
        optimizer = self.optimizer(self.model.parameters(), lr=start_lr)
        
        # Calculate the multiplication factor for each step
        if step_mode == "exp":
            gamma = (end_lr / start_lr) ** (1 / num_iter)
        else:
            gamma = (end_lr - start_lr) / num_iter
            
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma) if step_mode == "exp" else None
        
        learning_rates = []
        losses = []
        best_loss = float('inf')
        
        # Create iterator for training data
        iterator = iter(self.train_loader)
        
        for iteration in range(num_iter):
            try:
                batch_X, batch_y = next(iterator)
            except StopIteration:
                iterator = iter(self.train_loader)
                batch_X, batch_y = next(iterator)
                
            # Forward pass
            self.model.train()
            optimizer.zero_grad()
            outputs = self.model(batch_X)
            loss = self.criterion(outputs, batch_y.squeeze())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Store the values
            current_lr = optimizer.param_groups[0]['lr']
            learning_rates.append(current_lr)
            losses.append(loss.item())
            
            # Update learning rate
            if step_mode == "exp":
                scheduler.step()
            else:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = start_lr + (gamma * (iteration + 1))
            
            # Stop if the loss is exploding
            if iteration > 0 and losses[-1] > 4 * best_loss:
                break
                
            if losses[-1] < best_loss:
                best_loss = losses[-1]
        
        # Restore the original model state
        self.model.load_state_dict(original_state['model'])
        
        if show_plot:
            plt.figure(figsize=(10, 6))
            plt.plot(learning_rates, losses)
            plt.xscale('log')
            plt.xlabel('Learning Rate (log scale)')
            plt.ylabel('Loss')
            plt.title('Learning Rate Finder')
            plt.show()
            
        # Find the point of steepest descent
        smoothed_losses = np.array(losses)
        min_grad_idx = np.gradient(smoothed_losses).argmin()
        optimal_lr = learning_rates[min_grad_idx]
            
        return optimal_lr, learning_rates, losses

## Initial Comparison

In [None]:
data["delta_debit_mgb"] = data["d√©bit_mgb"].diff().fillna(0)

In [None]:

# Cr√©ation de la figure
fig = go.Figure()

# Ajout de la courbe pour P_cumul_7j (Axe principal Y1)
fig.add_trace(go.Scatter(
    x=data.index, 
    y=data['d√©bit_insitu'], 
    mode='lines', 
    name="Pluie cumul√©e 7j", 
    line=dict(color='blue'),
    yaxis="y1"  # Sp√©cifier l'axe Y1
))

# Ajout de la courbe pour d√©bit_insitu (Axe secondaire Y2)
fig.add_trace(go.Scatter(
    x=data.index, 
    y=data["d√©bit_mgb"], 
    mode='lines', 
    name="D√©bit in situ", 
    line=dict(color='red', dash='dash'),
    yaxis="y2"  # Sp√©cifier l'axe Y2
))

# Mise en forme du graphique avec deux axes Y
fig.update_layout(
    title="√âvolution de P_cumul_7j et D√©bit in situ en fonction du temps",
    xaxis=dict(title="Temps"),
    yaxis=dict(
        title="Pluie cumul√©e 7j (mm)", 
        #titlefont=dict(color="blue"), 
        tickfont=dict(color="blue"),
        side="left"
    ),
    yaxis2=dict(
        title="D√©bit in situ (m¬≥/s)", 
        #titlefont=dict(color="red"), 
        tickfont=dict(color="red"),
        overlaying="y",  # Superposition sur l'axe principal
        side="right"  # Placement √† droite
    ),
    legend=dict(x=0.02, y=0.98),  # Position de la l√©gende
    template="plotly_white"
)

# Affichage
fig.show()


In [None]:
import plotly.graph_objects as go

# Cr√©ation de la figure
fig = go.Figure()

# Ajout de la courbe pour P_cumul_7j (Axe principal Y1)
fig.add_trace(go.Scatter(
    x=data.index, 
    y=data['d√©bit_insitu'], 
    mode='lines', 
    name="Pluie cumul√©e 7j", 
    line=dict(color='blue'),
    yaxis="y1"  # Sp√©cifier l'axe Y1
))

# Ajout de la courbe pour d√©bit_insitu (Axe secondaire Y2)
fig.add_trace(go.Scatter(
    x=data.index, 
    y=data["delta_debit_mgb"], 
    mode='lines', 
    name="D√©bit in situ", 
    line=dict(color='red', dash='dash'),
    yaxis="y2"  # Sp√©cifier l'axe Y2
))

# Mise en forme du graphique avec deux axes Y
fig.update_layout(
    title="√âvolution de P_cumul_7j et D√©bit in situ en fonction du temps",
    xaxis=dict(title="Temps"),
    
    yaxis=dict(
        title="Pluie cumul√©e 7j (mm)", 
        # titlefont=dict(color="blue"), 
        tickfont=dict(color="blue"),
        side="left"
    ),
    
    yaxis2=dict(
        title="D√©bit in situ (m¬≥/s)", 
        # titlefont=dict(color="red"), 
        tickfont=dict(color="red"),
        overlaying="y",  # Superposition sur l'axe principal
        side="right"  # Placement √† droite
    ),

    legend=dict(x=0.02, y=0.98),  # Position de la l√©gende
    template="plotly_white"
)

# Affichage
fig.show()
