In [None]:
from training.dnn.train_dnn_3_times import *

In [None]:
# Simulazione degli argomenti per il notebook
class Args:
    def __init__(self):
        self.output_path = "outputs/val_set"
        self.models_path = "models/val_set"
        self.exp_name = "mlp"  # Cambia qui: "mlp", "lstm", "gru"
        self.seed = 42
        self.batch_size = 4096
        self.epochs = 100
        self.lr = 0.01


args = Args()

# Verifica i parametri
print(f"Configurazione:")
print(f"  - Model: {args.exp_name}")
print(f"  - Output path: {args.output_path}")
print(f"  - Models path: {args.models_path}")
print(f"  - Seed: {args.seed}")
print(f"  - Batch size: {args.batch_size}")
print(f"  - Epochs: {args.epochs}")
print(f"  - Learning rate: {args.lr}")

In [None]:
print(f"Starting {args.exp_name.upper()} training pipeline with multiple seeds...")

# Setup
setup_environment(args)

In [None]:
print("Loading pre-prepared data splits...")
train_set, val_set, test_set, X_cols, y_cols = load_splits()

# Prepare data for TensorFlow
X_train, X_val, X_test, y_train, y_val, y_test = prepare_data(
    train_set, val_set, test_set, X_cols, y_cols, args.exp_name
)

In [None]:
# Define seeds for multiple runs
seeds = [args.seed, args.seed + 1, args.seed + 2]

best_mae = float("inf")
best_model = None
best_results = None
best_seed = None

print(f"\n{'='*60}")
print(f"TRAINING WITH MULTIPLE SEEDS: {seeds}")
print(f"{'='*60}")

In [None]:
# Train with different seeds
for i, seed in enumerate(seeds, 1):
    print(f"\n{'='*40}")
    print(f"RUN {i}/3 - SEED {seed}")
    print(f"{'='*40}")

    # Train model
    model, history = train(X_train, y_train, X_val, y_val, args, seed)

    # Evaluate model
    results = evaluate_model(model, args.exp_name, val_set, X_cols, y_cols)

    # Calculate MAE
    current_mae = calculate_mae(results)
    print(f"\nSeed {seed} - Patient-based MAE: {current_mae:.4f}")

    # Check if this is the best model so far
    if current_mae < best_mae:
        best_mae = current_mae
        best_model = model
        best_results = results
        best_seed = seed
        print(f"🏆 New best model found with seed {seed}!")

    print(f"Current best MAE: {best_mae:.4f} (seed {best_seed})")

In [None]:
# Save best model and results
print(f"\n{'='*60}")
print(f"SAVING BEST MODEL")
print(f"{'='*60}")
print(f"Best model achieved with seed {best_seed}")
print(f"Best MAE: {best_mae:.4f}")

# Print final results
print_results(best_results)

In [None]:
# Save best model (rename from temporary seed-specific name)
import shutil

temp_model_path = f"{args.models_path}/{args.exp_name}_seed_{best_seed}.weights.h5"
final_model_path = f"{args.models_path}/{args.exp_name}.weights.h5"

if os.path.exists(temp_model_path):
    shutil.move(temp_model_path, final_model_path)
    print(f"Best model saved to: {final_model_path}")

    # Clean up other temporary model files
    for seed in seeds:
        if seed != best_seed:
            temp_path = f"{args.models_path}/{args.exp_name}_seed_{seed}.weights.h5"
            if os.path.exists(temp_path):
                os.remove(temp_path)

# Save best results
output_file = f"{args.output_path}/{args.exp_name}_output.csv"
best_results.to_csv(output_file, index=False)
print(f"Best results saved to: {output_file}")

print(f"\nTraining pipeline completed successfully!")
print(f"Best model trained with seed {best_seed} (MAE: {best_mae:.4f})")