In [None]:
import numpy as np
import pandas as pd
import os
import time
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

In [53]:
data_dir = "data_csv"

In [54]:
def load_csv_dataset(file_name):
    """
    Charge un dataset CSV depuis le dossier data_csv avec features et label {-1,1}.
    """
    full_path = os.path.join(data_dir, file_name)
    data = pd.read_csv(full_path)
    X = data.drop(columns=["label"]).values
    Y = data["label"].values
    return X, Y

In [55]:
def passive_learning(X, Y, classifier=None, step=5):
    """
    Apprentissage passif avec learning curve.
    X, Y : dataset complet
    classifier : instance sklearn (default = Perceptron)
    step : incrément du nombre d'exemples pour la courbe
    """
    if classifier is None:
        classifier = Perceptron(max_iter=1000, tol=1e-3, random_state=42)

    n_samples = len(Y)
    learning_curve = []
    for i in range(1, len(Y)+1):
        X_sub = X[:i]
        Y_sub = Y[:i]
        
        if len(np.unique(Y_sub)) < 2:
            continue  # impossible d'entraîner avec une seule classe
        
        clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
        clf.fit(X_sub, Y_sub)
        preds = clf.predict(X)
        acc = float(np.mean(preds == Y))
        learning_curve.append((i, acc))


    # Retourner le classifieur entraîné sur tout le dataset et la learning curve
    classifier.fit(X, Y)
    final_accuracy = accuracy_score(Y, classifier.predict(X))
    return classifier, final_accuracy, learning_curve


In [56]:
import pandas as pd
import os

# Dossier pour enregistrer les résultats
results_dir = "results_csv"
os.makedirs(results_dir, exist_ok=True)

def save_passive_results_csv(results, file_name="passive_results.csv"):
    """
    Enregistre les résultats de l'apprentissage passif dans un CSV
    results : liste de dictionnaires [{'dataset':..., 'accuracy':..., 'n_labels':...}, ...]
    """
    df = pd.DataFrame(results)
    full_path = os.path.join(results_dir, file_name)
    df.to_csv(full_path, index=False)
    print(f"Résultats enregistrés dans {full_path}")


In [57]:
results = []

In [58]:
dataset = "synthetique"
X, Y = load_csv_dataset(f"{dataset}.csv")
start_time = time.time()
clf, acc, learning_curve = passive_learning(X, Y)
end_time = time.time()
passive_time = end_time - start_time
results.append({
    "dataset": dataset,
    "accuracy": acc,
    "n_labels": len(Y),
    "time":passive_time,
    "learning_curve": str(learning_curve)
})

In [59]:
dataset = "iris"
X, Y = load_csv_dataset(f"{dataset}.csv")
start_time = time.time()
clf, acc, learning_curve = passive_learning(X, Y)
end_time = time.time()
passive_time = end_time - start_time
results.append({
    "dataset": dataset,
    "accuracy": acc,
    "n_labels": len(Y),
    "time":passive_time,
    "learning_curve": str(learning_curve)
})

In [60]:
# Enregistrer le CSV
save_passive_results_csv(results)

Résultats enregistrés dans results_csv\passive_results.csv
