In [None]:
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import time

def bootstrap_confidence_interval(model, x_test, y_test, metric, n_bootstrap=1000, alpha=0.05):
    scores = []
    x_test = np.array(x_test)  # Conversion en tableau NumPy
    y_test = np.array(y_test)  # Conversion en tableau NumPy

    for _ in range(n_bootstrap):
        indices = resample(np.arange(len(x_test)), replace=True)
        x_sample = x_test[indices]
        y_sample = y_test[indices]
        y_pred = model.predict(x_sample)
        score = metric(y_sample, y_pred)
        scores.append(score)
    
    lower_bound = np.percentile(scores, alpha / 2 * 100)
    upper_bound = np.percentile(scores, (1 - alpha / 2) * 100)
    return (lower_bound, upper_bound)

def evaluate_model(model, x_train, y_train, x_dev, y_dev, metric_func, additional_params=None):
    if additional_params:
        model.set_params(**additional_params)
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    x_dev = np.array(x_dev)
    y_dev = np.array(y_dev)

    start_time = time.time()
    model.fit(x_train, y_train)
    train_time = time.time() - start_time

    y_pred = model.predict(x_dev)
    accuracy = accuracy_score(y_dev, y_pred)
    f1 = f1_score(y_dev, y_pred, average='weighted')
    confidence_interval = bootstrap_confidence_interval(model, x_test, y_test, metric_func)

    return {
        "Accuracy": accuracy,
        "F1-Score": f1,
        "Time (s)": train_time,
        "Confidence Interval": confidence_interval,
    }


In [None]:
from sklearn.utils import resample
import numpy as np

def bootstrap_confidence_interval(model, x_test, y_test, metric, n_bootstrap=1000, alpha=0.05):
    """
    Calcule un intervalle de confiance pour une métrique donnée en utilisant le bootstrap.
    
    :param model: Le modèle entraîné
    :param x_test: Les données de test
    :param y_test: Les labels de test
    :param metric: Fonction pour calculer la métrique (par ex. accuracy_score)
    :param n_bootstrap: Nombre d'échantillons bootstrap
    :param alpha: Niveau de confiance (par défaut 95%)
    :return: Tuple contenant les bornes inférieure et supérieure de l'intervalle de confiance
    """
    scores = []
    for _ in range(n_bootstrap):
        # Générer des indices bootstrap
        indices = resample(np.arange(len(x_test)), replace=True)
        x_sample = x_test[indices]
        y_sample = y_test[indices]
        # Prédire et calculer la métrique
        y_pred = model.predict(x_sample)
        score = metric(y_sample, y_pred)
        scores.append(score)
    
    # Calculer les percentiles pour l'intervalle de confiance
    lower_bound = np.percentile(scores, alpha / 2 * 100)
    upper_bound = np.percentile(scores, (1 - alpha / 2) * 100)
    return (lower_bound, upper_bound)


In [None]:
# Hyperparamètres à tester
rf_params = [{"n_estimators": 50, "max_depth": 10}, {"n_estimators": 100, "max_depth": 10},{"n_estimators": 50, "max_depth": 20},{"n_estimators": 100, "max_depth": 20},{"n_estimators": 200, "max_depth": 10},{"n_estimators": 200, "max_depth": 20}]
svm_params = [{"kernel": "poly", "C": 1}, {"kernel": "rbf", "C": 1}, {"kernel": "rbf", "C": 10},{"kernel": "poly", "C": 10},{"kernel": "sigmoid", "C": 1},{"kernel": "sigmoid", "C": 10}]
mlp_params = [{"hidden_layer_sizes": (50,), "max_iter": 300}, {"hidden_layer_sizes": (100,), "max_iter": 500}]
gnb_params = [{'var_smoothing':}]  # GNB n'a pas de paramètres significatifs à varier


In [None]:
# Initialiser les modèles
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

models_params = {
    "GaussianNB": (GaussianNB(), gnb_params),
    "SVM": (SVC(random_state=42), svm_params),
    "RandomForest": (RandomForestClassifier(random_state=42), rf_params),
    "MLP": (MLPClassifier(random_state=42), mlp_params),
}

# Résultats
results = {}

for model_name, (model, param_list) in models_params.items():
    print(f"Entraînement de {model_name}...")
    results[model_name] = []
    for params in param_list:
        result = evaluate_model(model, x_train, y_train, x_test, y_test, accuracy_score, params)
        result["Hyperparameters"] = params
        results[model_name].append(result)


In [None]:
import pandas as pd

for model_name, model_results in results.items():
    print(f"\nRésultats pour {model_name} :\n")
    df = pd.DataFrame(model_results)
    print(df[["Hyperparameters", "Accuracy", "F1-Score", "Time (s)", "Confidence Interval"]])
