In [1]:
import polars as pl 
silver_df = pl.read_csv('../data/silver_mega_evolutions.csv')

In [2]:
target_cols = [
    'evolved_attack', 'evolved_defense', 
    'evolved_sp_attack', 'evolved_sp_defense', 'evolved_speed'
]

input_cols = [col for col in silver_df.columns if col.startswith('base_')]

# Préparer X et y
X = silver_df.select(input_cols)
y = silver_df.select(target_cols)

In [3]:
from sklearn.model_selection import train_test_split
X_np = X.to_numpy()
y_np = y.to_numpy()
X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(X_np, y_np, test_size=0.2, random_state=42)

In [4]:
from sklearn.preprocessing import StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()

In [5]:
X_train_scaled = scaler_X.fit_transform(X_train_np)
y_train_scaled = scaler_y.fit_transform(y_train_np)
X_test_scaled = scaler_X.transform(X_test_np)
y_test_scaled = scaler_y.transform(y_test_np)

In [6]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.multioutput import MultiOutputRegressor, RegressorChain
from sklearn.svm import SVR

models = {
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Gradient Boosting": MultiOutputRegressor(GradientBoostingRegressor(random_state=42)),
    "Gradient Boosting Chain": RegressorChain(GradientBoostingRegressor(random_state=42), 
                                             order=[0, 1, 2, 3, 4])  # 5 colonnes = indices 0-4
}

In [7]:
# Importer MLflow pour le tracking des expériences
import mlflow
import mlflow.sklearn
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Configurer l'expérience MLflow
mlflow.set_experiment("mega_evolution_prediction")

<Experiment: artifact_location='file:///Users/davidjbreau/projects/PKMN.DB/ml/evolution_prediction/notebooks/mlruns/347477286032186581', creation_time=1747857136449, experiment_id='347477286032186581', last_update_time=1747857136449, lifecycle_stage='active', name='mega_evolution_prediction', tags={}>

In [8]:
results = {}

for name, model in models.items():
    print(f"\nEntraînement de {name}...")
    
    # Démarrer un run MLflow
    with mlflow.start_run(run_name=name):
        # Entraînement
        model.fit(X_train_scaled, y_train_scaled)
        
        # Prédictions
        y_pred_scaled = model.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled)
        y_true = scaler_y.inverse_transform(y_test_scaled)
        
        # Évaluation
        metrics = {}
        rmse_values = []
        r2_values = []
        
        for i, col in enumerate(target_cols):
            stat_name = col.replace('evolved_', '')
            rmse = np.sqrt(mean_squared_error(y_true[:, i], y_pred[:, i]))
            r2 = r2_score(y_true[:, i], y_pred[:, i])
            
            # Collecter les valeurs pour calculer les moyennes
            rmse_values.append(rmse)
            r2_values.append(r2)
            
            # Enregistrer les métriques dans MLflow
            mlflow.log_metric(f"{stat_name}_RMSE", rmse)
            mlflow.log_metric(f"{stat_name}_R2", r2)
            
            metrics[stat_name] = {"RMSE": rmse, "R²": r2}
        
        # Calculer et enregistrer les métriques moyennes
        avg_rmse = np.mean(rmse_values)
        avg_r2 = np.mean(r2_values)
        
        mlflow.log_metric("average_RMSE", avg_rmse)
        mlflow.log_metric("average_R2", avg_r2)
        
        # Ajouter les moyennes aux métriques
        metrics["Average"] = {"RMSE": avg_rmse, "R²": avg_r2}
        
        # Créer la signature du modèle
        from mlflow.models.signature import infer_signature
        signature = infer_signature(X_train_scaled, y_train_scaled)
        
        # Log du modèle dans MLflow avec signature
        mlflow.sklearn.log_model(model, name, signature=signature)
        
        # Afficher les résultats
        print(f"Résultats pour {name}:")
        for stat, vals in metrics.items():
            print(f"  {stat}: RMSE = {vals['RMSE']:.2f}, R² = {vals['R²']:.2f}")
        
        # Mettre en évidence la métrique globale
        print(f"  --> Moyenne globale: RMSE = {avg_rmse:.2f}, R² = {avg_r2:.2f}")
        
        results[name] = metrics


Entraînement de Decision Tree...


Résultats pour Decision Tree:
  attack: RMSE = 26.82, R² = 0.46
  defense: RMSE = 31.84, R² = -0.12
  sp_attack: RMSE = 41.14, R² = 0.05
  sp_defense: RMSE = 16.83, R² = 0.19
  speed: RMSE = 45.94, R² = -0.16
  Average: RMSE = 32.51, R² = 0.08
  --> Moyenne globale: RMSE = 32.51, R² = 0.08

Entraînement de Random Forest...
Résultats pour Random Forest:
  attack: RMSE = 23.96, R² = 0.57
  defense: RMSE = 23.65, R² = 0.38
  sp_attack: RMSE = 29.65, R² = 0.51
  sp_defense: RMSE = 17.03, R² = 0.17
  speed: RMSE = 32.72, R² = 0.41
  Average: RMSE = 25.40, R² = 0.41
  --> Moyenne globale: RMSE = 25.40, R² = 0.41

Entraînement de Gradient Boosting...
Résultats pour Gradient Boosting:
  attack: RMSE = 24.38, R² = 0.55
  defense: RMSE = 22.29, R² = 0.45
  sp_attack: RMSE = 14.35, R² = 0.88
  sp_defense: RMSE = 17.86, R² = 0.09
  speed: RMSE = 18.76, R² = 0.81
  Average: RMSE = 19.53, R² = 0.56
  --> Moyenne globale: RMSE = 19.53, R² = 0.56

Entraînement de Gradient Boosting Chain...
Résultats p