# Importation des librairies

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchmtlr import MTLR, mtlr_neg_log_likelihood, mtlr_survival
from torchmtlr.utils import encode_survival, make_time_bins
from sksurv.util import Surv
from sksurv.metrics import concordance_index_ipcw
from sklearn.model_selection import train_test_split, KFold
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances
import json
import matplotlib.pyplot as plt

# Configuration
torch.manual_seed(42)
np.random.seed(42)

# Modèle MTLR

In [None]:
df_train = pd.read_csv("../data/train_enhanced.csv")
df_eval = pd.read_csv("../data/eval_enhanced.csv")

print(f"Train data: {df_train.shape}")
print(f"Eval data: {df_eval.shape}")
print("\nColonnes train:")
print(df_train.columns.tolist())


In [None]:
print("'ID' in df_train columns:", "ID" in df_train.columns)
print("\nFirst few columns of df_train:")
print(df_train.columns[:10].tolist())

In [None]:
target = ["OS_STATUS", "OS_YEARS"]
X = df_train.drop(columns=target + ["ID"])
X = pd.get_dummies(X, drop_first=True).astype(float)

# Préparation des données d'évaluation
X_eval = df_eval.drop(columns=["ID"])
X_eval = pd.get_dummies(X_eval, drop_first=True)
X_eval = X_eval.reindex(columns=X.columns, fill_value=0).astype(float)

# Conversion en tenseurs
y_time = torch.tensor(df_train['OS_YEARS'].values, dtype=torch.float32)
y_event = torch.tensor(df_train['OS_STATUS'].values, dtype=torch.float32)

print(f"X shape: {X.shape}")
print(f"X_eval shape: {X_eval.shape}")
print(f"Nombre de features: {X.shape[1]")

# Définir le splitter KFold avant d'utiliser dans objective
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [None]:
## Insertion d objective et optimisation Optuna

def objective(trial):
    # Hyperparamètres à optimiser
    params = {
        'n_hidden1': trial.suggest_int('n_hidden1', 16, 240, step=32),
        'n_hidden2': trial.suggest_int('n_hidden2', 8, 120, step=16),
        'dropout1': trial.suggest_float('dropout1', 0.0, 0.5),
        'dropout2': trial.suggest_float('dropout2', 0.0, 0.5),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'n_epochs': trial.suggest_int('n_epochs', 50, 200, step=25),
        'C1': trial.suggest_float('C1', 0.1, 5.0, log=True),
        'activation': trial.suggest_categorical('activation', ['relu', 'leaky_relu', 'elu']),
        'optimizer': trial.suggest_categorical('optimizer', ['adam', 'adamw']),
        'weight_decay': trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    }

    cv_scores = []

    for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
        # Préparation des données du fold
        X_train_fold = X.iloc[train_idx]
        X_test_fold = X.iloc[test_idx]
        y_time_train_fold = y_time[train_idx]
        y_time_test_fold = y_time[test_idx]
        y_event_train_fold = y_event[train_idx]
        y_event_test_fold = y_event[test_idx]

        # Le reste du pipeline (encodage, time_bins, entraînement, évaluation)
        # Conversion en tenseurs
        X_train_fold_tensor = torch.tensor(X_train_fold.values, dtype=torch.float32)
        X_test_fold_tensor = torch.tensor(X_test_fold.values, dtype=torch.float32)

        # Time bins et encodage
        time_bins_fold = make_time_bins(y_time_train_fold, event=y_event_train_fold)
        target_fold = encode_survival(y_time_train_fold, y_event_train_fold, time_bins_fold)

        # Structure Surv pour l'entraînement
        y_train_struct_fold = Surv.from_arrays(
            event=y_event_train_fold.numpy().astype(bool),
            time=y_time_train_fold.numpy()
        )

        # Construction du modèle
        if params['activation'] == 'relu':
            activation = nn.ReLU()
        elif params['activation'] == 'leaky_relu':
            activation = nn.LeakyReLU()
        else:  # elu
            activation = nn.ELU()

        model = nn.Sequential(
            nn.Linear(X_train_fold.shape[1], params['n_hidden1']),
            nn.BatchNorm1d(params['n_hidden1']),
            activation,
            nn.Dropout(params['dropout1']),
            nn.Linear(params['n_hidden1'], params['n_hidden2']),
            nn.BatchNorm1d(params['n_hidden2']),
            activation,
            nn.Dropout(params['dropout2']),
            MTLR(params['n_hidden2'], len(time_bins_fold))
        )

        # Optimizer
        if params['optimizer'] == 'adamw':
            optimizer = torch.optim.AdamW(model.parameters(), lr=params['lr'],
                                          weight_decay=params['weight_decay'])
        else:
            optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'],
                                         weight_decay=params['weight_decay'])

        # Entraînement
        model.train()
        for epoch in range(params['n_epochs']):
            optimizer.zero_grad()
            logits = model(X_train_fold_tensor)
            loss = mtlr_neg_log_likelihood(logits, target_fold, model[-1],
                                           C1=params['C1'], average=True)
            loss.backward()
            optimizer.step()

        # Évaluation avec la méthode optimale (logsumexp)
        model.eval()
        with torch.no_grad():
            logits_test = model(X_test_fold_tensor)
            test_risk = torch.logsumexp(logits_test, dim=1).numpy()

        # Structure Surv pour le test
        y_test_struct_fold = Surv.from_arrays(
            event=y_event_test_fold.numpy().astype(bool),
            time=y_time_test_fold.numpy()
        )

        # Calcul du C-index
        try:
            test_score = concordance_index_ipcw(
                y_train_struct_fold,
                y_test_struct_fold,
                test_risk,
                tau=7.0
            )[0]
            cv_scores.append(test_score)
        except Exception as e:
            # En cas d'erreur, on utilise un score par défaut
            cv_scores.append(0.5)
            continue

    return np.mean(cv_scores)


# ... démarrage de l’optimisation Optuna comme précédemment ...
print("Début de l'optimisation Optuna...")
study = optuna.create_study(direction='maximize', study_name='mtlr_optimization')
study.optimize(objective, n_trials=1000, show_progress_bar=True)

print(f"\n OPTIMISATION TERMINÉE")
print(f"Best C-Index IPCW: {study.best_value:.4f}")
print("\nMeilleurs hyperparamètres:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

# Sauvegarde des meilleurs paramètres
best_params = study.best_params
with open("best_params_mtlr.json", "w") as f:
    json.dump(best_params, f, indent=4)

print("Paramètres sauvegardés dans best_params_mtlr.json")

## Inference

In [None]:
fig1 = plot_optimization_history(study)
fig1.show()

fig2 = plot_param_importances(study)
fig2.show()

# Sauvegarde des paramètres
best_params = study.best_params
with open("best_params_mtlr.json", "w") as f:
    json.dump(best_params, f, indent=4)


In [None]:
best_params = study.best_params

cv_scores_train_tuned = []
cv_scores_test_tuned = []

print("\n=== VALIDATION CROISÉE AVEC MODÈLE OPTIMISÉ ===")

for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
    print(f"Fold {fold}: train = {len(train_idx)}, test = {len(test_idx)}")
    # Préparation des données du fold
    X_train_fold = X.iloc[train_idx]
    X_test_fold = X.iloc[test_idx]
    y_time_train_fold = y_time[train_idx]
    y_time_test_fold = y_time[test_idx]
    y_event_train_fold = y_event[train_idx]
    y_event_test_fold = y_event[test_idx]
    
    # Conversion en tenseurs
    X_train_fold_tensor = torch.tensor(X_train_fold.values, dtype=torch.float32)
    X_test_fold_tensor = torch.tensor(X_test_fold.values, dtype=torch.float32)
    
    # Création des time bins pour ce fold
    time_bins_fold = make_time_bins(y_time_train_fold, event=y_event_train_fold)
    target_fold = encode_survival(y_time_train_fold, y_event_train_fold, time_bins_fold)
    
    # Structures Surv pour l'évaluation
    y_train_struct_fold = Surv.from_arrays(
        event=y_event_train_fold.numpy().astype(bool), 
        time=y_time_train_fold.numpy()
    )
    y_test_struct_fold = Surv.from_arrays(
        event=y_event_test_fold.numpy().astype(bool), 
        time=y_time_test_fold.numpy()
    )
    
    # Construction du modèle avec les meilleurs paramètres
    if best_params['activation'] == 'relu':
        activation = nn.ReLU()
    elif best_params['activation'] == 'leaky_relu':
        activation = nn.LeakyReLU()
    else:
        activation = nn.ELU()
        
    model_tuned = nn.Sequential(
        nn.Linear(X_train_fold.shape[1], best_params['n_hidden1']),
        nn.BatchNorm1d(best_params['n_hidden1']),
        activation,
        nn.Dropout(best_params['dropout1']),
        nn.Linear(best_params['n_hidden1'], best_params['n_hidden2']),
        nn.BatchNorm1d(best_params['n_hidden2']),
        activation,
        nn.Dropout(best_params['dropout2']),
        MTLR(best_params['n_hidden2'], len(time_bins_fold))
    )
    
    # Optimizer
    if best_params['optimizer'] == 'adamw':
        optimizer = torch.optim.AdamW(model_tuned.parameters(), lr=best_params['lr'], 
                                    weight_decay=best_params['weight_decay'])
    else:
        optimizer = torch.optim.Adam(model_tuned.parameters(), lr=best_params['lr'], 
                                   weight_decay=best_params['weight_decay'])
    
    # Entraînement du modèle optimisé
    model_tuned.train()
    fold_losses = []
    for epoch in range(best_params['n_epochs']):
        optimizer.zero_grad()
        logits = model_tuned(X_train_fold_tensor)
        loss = mtlr_neg_log_likelihood(logits, target_fold, model_tuned[-1], 
                                     C1=best_params['C1'], average=True)
        loss.backward()
        optimizer.step()
        fold_losses.append(loss.item())
    
    # Évaluation avec la méthode optimale (logsumexp)
    model_tuned.eval()
    with torch.no_grad():
        logits_train = model_tuned(X_train_fold_tensor)
        logits_test = model_tuned(X_test_fold_tensor)
        
        train_risk = torch.logsumexp(logits_train, dim=1).numpy()
        test_risk = torch.logsumexp(logits_test, dim=1).numpy()
    
    # Calcul des C-index
    train_score = concordance_index_ipcw(y_train_struct_fold, y_train_struct_fold, train_risk, tau=7)[0]
    test_score = concordance_index_ipcw(y_train_struct_fold, y_test_struct_fold, test_risk, tau=7)[0]
    
    cv_scores_train_tuned.append(train_score)
    cv_scores_test_tuned.append(test_score)
    
    print(f"Fold {fold} - Train C-Index IPCW: {train_score:.4f}, Test C-Index IPCW: {test_score:.4f}")
    print(f"          Loss finale: {fold_losses[-1]:.4f}")

print(f"\nRÉSULTATS MODÈLE OPTIMISÉ")
print(f"Average Train C-Index IPCW: {np.mean(cv_scores_train_tuned):.4f} (+/- {np.std(cv_scores_train_tuned):.4f})")
print(f"Average Test C-Index IPCW: {np.mean(cv_scores_test_tuned):.4f} (+/- {np.std(cv_scores_test_tuned):.4f})")

# Ensure score containers exist before using them
if 'cv_scores_test' not in locals():
    cv_scores_test = []
if 'cv_scores_test_tuned' not in locals():
    cv_scores_test_tuned = []

# If they are still empty, inform the user and avoid raising an exception
if len(cv_scores_test) == 0 or len(cv_scores_test_tuned) == 0:
    print("Warning: One or both cross-validation score lists are empty. "
          "Baseline or Tuned scores may not have been computed due to earlier errors.")
else:
    # Comparaison avec le baseline
    print(f"\nCOMPARAISON AVEC BASELINE")
    print(f"Baseline Model - Average Test C-Index IPCW: {np.mean(cv_scores_test):.4f} (+/- {np.std(cv_scores_test):.4f})")
    print(f"Tuned Model    - Average Test C-Index IPCW: {np.mean(cv_scores_test_tuned):.4f} (+/- {np.std(cv_scores_test_tuned):.4f})")

    improvement = np.mean(cv_scores_test_tuned) - np.mean(cv_scores_test)
    print(f"\nImprovement (Tuned - Baseline): {improvement:.4f}")

# Visualisation des scores par fold
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
folds = range(1, 6)
plt.plot(folds, cv_scores_test, 'o-', label='Baseline', linewidth=2, markersize=8)
plt.plot(folds, cv_scores_test_tuned, 's-', label='Optimisé', linewidth=2, markersize=8)
plt.xlabel('Fold')
plt.ylabel('C-Index IPCW')
plt.title('Comparaison des performances par fold')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
models = ['Baseline', 'Optimisé']
means = [np.mean(cv_scores_test), np.mean(cv_scores_test_tuned)]
stds = [np.std(cv_scores_test), np.std(cv_scores_test_tuned)]
plt.bar(models, means, yerr=stds, capsize=5, alpha=0.7, color=['skyblue', 'lightcoral'])
plt.ylabel('C-Index IPCW (moyenne)')
plt.title('Performance moyenne avec écart-type')

plt.tight_layout()
plt.show()


In [None]:
# Time bins sur toutes les données
time_bins_final = make_time_bins(y_time, event=y_event)
target_final = encode_survival(y_time, y_event, time_bins_final)
X_tensor = torch.tensor(X.values, dtype=torch.float32)
X_eval_tensor = torch.tensor(X_eval.values, dtype=torch.float32)

# Construction du modèle final avec les meilleurs paramètres
if best_params['activation'] == 'relu':
    activation_final = nn.ReLU()
elif best_params['activation'] == 'leaky_relu':
    activation_final = nn.LeakyReLU()
else:
    activation_final = nn.ELU()

model_final = nn.Sequential(
    nn.Linear(X.shape[1], best_params['n_hidden1']),
    nn.BatchNorm1d(best_params['n_hidden1']),
    activation_final,
    nn.Dropout(best_params['dropout1']),
    nn.Linear(best_params['n_hidden1'], best_params['n_hidden2']),
    nn.BatchNorm1d(best_params['n_hidden2']),
    activation_final,
    nn.Dropout(best_params['dropout2']),
    MTLR(best_params['n_hidden2'], len(time_bins_final))
)

if best_params['optimizer'] == 'adamw':
    optimizer = torch.optim.AdamW(model_final.parameters(), lr=best_params['lr'], 
                                weight_decay=best_params['weight_decay'])
else:
    optimizer = torch.optim.Adam(model_final.parameters(), lr=best_params['lr'], 
                               weight_decay=best_params['weight_decay'])

# Entraînement final avec affichage de la progression
model_final.train()
final_losses = []
for epoch in range(best_params['n_epochs']):
    optimizer.zero_grad()
    logits = model_final(X_tensor)
    loss = mtlr_neg_log_likelihood(logits, target_final, model_final[-1], 
                                 C1=best_params['C1'], average=True)
    loss.backward()
    optimizer.step()
    final_losses.append(loss.item())
    
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}/{best_params['n_epochs']} - Loss: {loss.item():.4f}")

# Plot de la loss finale
plt.figure(figsize=(10, 4))
plt.plot(final_losses)
plt.title('Loss pendant l\'entraînement final')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()


In [None]:
eval_risk = mtlr_predict_risk(model_final, X_eval_tensor, time_bins_final)

# Création du fichier de soumission
submission = pd.DataFrame({
    "ID": df_eval["ID"],
    "risk_score": eval_risk
})

# Sauvegarde
submission.to_csv('../submissions/mtlr_tuned.csv', index=False)

# Aperçu des prédictions
submission.head(10)

# Feature Importance

In [None]:
def compute_mtlr_feature_importance(model, feature_names, X_tensor, y_event_sub, y_time_sub, n_iterations=100):
    """
    Calcule l'importance des features par permutation
    """
    # Score de base
    model.eval()
    with torch.no_grad():
        logits = model(X_tensor)
        baseline_risk = torch.logsumexp(logits, dim=1).numpy()
    
    # Structure Surv pour calcul du C-index
    y_struct = Surv.from_arrays(
        event=y_event_sub.numpy().astype(bool), 
        time=y_time_sub.numpy()
    )
    
    baseline_score = concordance_index_ipcw(y_struct, y_struct, baseline_risk, tau=7)[0]
    
    # Importance par permutation
    importances = {}
    X_np = X_tensor.numpy().copy()
    
    for i, feature in enumerate(feature_names):
        feature_scores = []
        
        for _ in range(n_iterations):
            # Permutation de la feature
            X_permuted = X_np.copy()
            np.random.shuffle(X_permuted[:, i])
            
            # Prédiction avec feature permutée
            with torch.no_grad():
                logits_perm = model(torch.tensor(X_permuted, dtype=torch.float32))
                risk_perm = torch.logsumexp(logits_perm, dim=1).numpy()
            
            score_perm = concordance_index_ipcw(y_struct, y_struct, risk_perm, tau=7)[0]
            feature_scores.append(score_perm)
        
        # Importance = baisse de performance
        importance = baseline_score - np.mean(feature_scores)
        importances[feature] = importance
    
    return importances, baseline_score

# Utilisation sur un sous-ensemble
idx = np.arange(1000)  # ou np.random.choice(len(X), 1000, replace=False) pour un échantillon aléatoire
feature_names = X.columns.tolist()
mtlr_importances, baseline_score = compute_mtlr_feature_importance(
    model_final, 
    feature_names[:20],
    X_tensor[idx],
    y_event[idx],
    y_time[idx],
    n_iterations=50
)

# Tri par importance
mtlr_importances = dict(sorted(mtlr_importances.items(), key=lambda x: abs(x[1]), reverse=True))

# Top 10 features
top_features = list(mtlr_importances.keys())[:10]
top_importances = list(mtlr_importances.values())[:10]

plt.figure(figsize=(12, 6))
colors = ['red' if imp > 0 else 'blue' for imp in top_importances]

plt.barh(top_features, top_importances, color=colors, alpha=0.7)
plt.xlabel('Importance (drop in C-index)')
plt.title('Top 10 Features les plus importantes (MTLR)')
plt.axvline(x=0, color='black', linestyle='-', alpha=0.3)
plt.gca().invert_yaxis()

for i, (feature, imp) in enumerate(zip(top_features, top_importances)):
    plt.text(imp, i, f'{imp:.4f}', va='center', ha='left' if imp > 0 else 'right')

plt.tight_layout()
plt.show()

# Sauvegarde
feature_importance_results = {
    'baseline_score': baseline_score,
    'feature_importances': mtlr_importances
}

with open("mtlr_feature_importance.json", "w") as f:
    json.dump(feature_importance_results, f, indent=4)
    event=y_event_train.numpy().astype(bool), 
    time=y_time_train.numpy()
)
y_test_struct = Surv.from_arrays(
    event=y_event_test.numpy().astype(bool), 
    time=y_time_test.numpy()  
)

print(f"Train: {len(X_train)} échantillons")
print(f"Test: {len(X_test)} échantillons")
print(f"Time bins: {len(time_bins)} points")

In [None]:
model_baseline = nn.Sequential(
    nn.Linear(X_train.shape[1], 64),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(64, 32),
    nn.ReLU(),
    MTLR(32, len(time_bins))
)

optimizer = torch.optim.Adam(model_baseline.parameters(), lr=0.001)

In [None]:
losses = []
model_baseline.train()
for epoch in range(100):
    optimizer.zero_grad()
    logits = model_baseline(X_train_tensor)
    loss = mtlr_neg_log_likelihood(logits, target_train, model_baseline[-1], C1=1.0, average=True)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
    
    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}/100 - Loss: {loss.item():.4f}")

# Plot de la loss
plt.figure(figsize=(10, 4))
plt.plot(losses)
plt.title('Loss pendant l\'entraînement baseline')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()

In [None]:
best_params= {'n_hidden1': 16, 'n_hidden2': 88, 'n_layers': 2, 'dropout1': 0.4880527166139476, 'dropout2': 0.3406450265001462, 'lr': 0.006867173180115493, 'n_epochs': 75, 'batch_size': 64, 'activation': 'relu', 'optimizer': 'adamw', 'use_weight_decay': False, 'C1': 1.5790286737537849}

In [None]:
def mtlr_predict_risk(model, X_tensor, time_bins):
    """Méthode optimale : log-sum-exp des logits"""
    model.eval()
    with torch.no_grad():
        logits = model(X_tensor)
        risk_scores = torch.logsumexp(logits, dim=1).numpy()
    return risk_scores

# Prédictions
mtlr_risk_train = mtlr_predict_risk(model_baseline, X_train_tensor, time_bins)
mtlr_risk_test = mtlr_predict_risk(model_baseline, X_test_tensor, time_bins)

# Évaluation
mtlr_cindex_train = concordance_index_ipcw(y_train_struct, y_train_struct, mtlr_risk_train, tau=7)[0]
mtlr_cindex_test = concordance_index_ipcw(y_train_struct, y_test_struct, mtlr_risk_test, tau=7)[0]


print(f"MTLR Model Concordance Index IPCW on train: {mtlr_cindex_train:.4f}")
print(f"MTLR Model Concordance Index IPCW on test: {mtlr_cindex_test:.4f}")


## Training

In [None]:
def objective(trial):
    # Hyperparamètres à optimiser
    params = {
        'n_hidden1': trial.suggest_int('n_hidden1', 16, 240, step=32),  # 16 + 7*32 = 240
        'n_hidden2': trial.suggest_int('n_hidden2', 8, 120, step=16),   # 8 + 7*16 = 120
        'dropout1': trial.suggest_float('dropout1', 0.0, 0.5),
        'dropout2': trial.suggest_float('dropout2', 0.0, 0.5),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'n_epochs': trial.suggest_int('n_epochs', 50, 200, step=25),
        'C1': trial.suggest_float('C1', 0.1, 5.0, log=True),
        'activation': trial.suggest_categorical('activation', ['relu', 'leaky_relu', 'elu']),
        'optimizer': trial.suggest_categorical('optimizer', ['adam', 'adamw']),
        'weight_decay': trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    }
    
    cv_scores = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(X)):
        # Préparation des données du fold
        X_train_fold = X.iloc[train_idx]
        X_test_fold = X.iloc[test_idx]
        y_time_train_fold = y_time[train_idx]
        y_time_test_fold = y_time[test_idx]
        y_event_train_fold = y_event[train_idx]
        y_event_test_fold = y_event[test_idx]
        
        # Conversion en tenseurs
        X_train_fold_tensor = torch.tensor(X_train_fold.values, dtype=torch.float32)
        X_test_fold_tensor = torch.tensor(X_test_fold.values, dtype=torch.float32)
        
        # Time bins et encodage
        time_bins_fold = make_time_bins(y_time_train_fold, event=y_event_train_fold)
        target_fold = encode_survival(y_time_train_fold, y_event_train_fold, time_bins_fold)
        
        # Structure Surv pour l'entraînement
        y_train_struct_fold = Surv.from_arrays(
            event=y_event_train_fold.numpy().astype(bool), 
            time=y_time_train_fold.numpy()
        )
        
        # Construction du modèle
        if params['activation'] == 'relu':
            activation = nn.ReLU()
        elif params['activation'] == 'leaky_relu':
            activation = nn.LeakyReLU()
        else:  # elu
            activation = nn.ELU()
            
        model = nn.Sequential(
            nn.Linear(X_train_fold.shape[1], params['n_hidden1']),
            nn.BatchNorm1d(params['n_hidden1']),
            activation,
            nn.Dropout(params['dropout1']),
            nn.Linear(params['n_hidden1'], params['n_hidden2']),
            nn.BatchNorm1d(params['n_hidden2']),
            activation,
            nn.Dropout(params['dropout2']),
            MTLR(params['n_hidden2'], len(time_bins_fold))
        )
        
        # Optimizer
        if params['optimizer'] == 'adamw':
            optimizer = torch.optim.AdamW(model.parameters(), lr=params['lr'], 
                                        weight_decay=params['weight_decay'])
        else:
            optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'], 
                                       weight_decay=params['weight_decay'])
        
        # Entraînement
        model.train()
        for epoch in range(params['n_epochs']):
            optimizer.zero_grad()
            logits = model(X_train_fold_tensor)
            loss = mtlr_neg_log_likelihood(logits, target_fold, model[-1], 
                                         C1=params['C1'], average=True)
            loss.backward()
            optimizer.step()
        
        # Évaluation avec la méthode optimale (logsumexp)
        model.eval()
        with torch.no_grad():
            logits_test = model(X_test_fold_tensor)
            test_risk = torch.logsumexp(logits_test, dim=1).numpy()
        
        # Structure Surv pour le test
        y_test_struct_fold = Surv.from_arrays(
            event=y_event_test_fold.numpy().astype(bool), 
            time=y_time_test_fold.numpy()
        )
        
        # Calcul du C-index
        try:
            test_score = concordance_index_ipcw(
                y_train_struct_fold, 
                y_test_struct_fold, 
                test_risk, 
                tau=7.0
            )[0]
            cv_scores.append(test_score)
        except Exception as e:
            # En cas d'erreur, on utilise un score par défaut
            cv_scores.append(0.5)
            continue
    
    return np.mean(cv
