In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.datasets import load_iris
import optuna

# Charger le dataset Iris
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="species")

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fonction objectif pour Optuna
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 10, 30, log=True)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 5)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    return accuracy

# Créer une étude Optuna et optimiser
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, n_jobs=-1)

# Meilleurs hyperparamètres trouvés
print("Meilleurs hyperparamètres :", study.best_params)

# Entraîner le modèle avec les meilleurs hyperparamètres
best_params = study.best_params
best_model = RandomForestClassifier(
    n_estimators=best_params['n_estimators'],
    max_depth=best_params['max_depth'],
    min_samples_split=best_params['min_samples_split'],
    min_samples_leaf=best_params['min_samples_leaf'],
    random_state=42
)
best_model.fit(X_train, y_train)

# Prédictions sur l'ensemble de test
y_pred = best_model.predict(X_test)

# Rapport de classification
print("\nRapport de classification :\n")
print(classification_report(y_test, y_pred))

# Méthode pour évaluer le modèle
def evaluate_model(model, X_test, y_test):
    accuracy = accuracy_score(y_test, model.predict(X_test))
    print(f"\nPrécision du modèle : {accuracy:.2f}")
    return accuracy

# Évaluer le modèle
evaluate_model(best_model, X_test, y_test)

[I 2024-12-12 13:15:19,719] A new study created in memory with name: no-name-a13c502b-b6dc-4901-b8c1-9a423d51354f
[I 2024-12-12 13:15:20,379] Trial 4 finished with value: 1.0 and parameters: {'n_estimators': 54, 'max_depth': 14, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 4 with value: 1.0.
[I 2024-12-12 13:15:20,532] Trial 8 finished with value: 1.0 and parameters: {'n_estimators': 81, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 4 with value: 1.0.
[I 2024-12-12 13:15:20,603] Trial 10 finished with value: 1.0 and parameters: {'n_estimators': 77, 'max_depth': 23, 'min_samples_split': 10, 'min_samples_leaf': 5}. Best is trial 4 with value: 1.0.
[I 2024-12-12 13:15:20,654] Trial 5 finished with value: 1.0 and parameters: {'n_estimators': 78, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 4 with value: 1.0.
[I 2024-12-12 13:15:20,903] Trial 11 finished with value: 1.0 and parameters: {'n_estimators': 121, 

Meilleurs hyperparamètres : {'n_estimators': 54, 'max_depth': 14, 'min_samples_split': 4, 'min_samples_leaf': 1}

Rapport de classification :

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Précision du modèle : 1.00


1.0