In [None]:
import cudf
from cuml.linear_model import LogisticRegression
from cuml.ensemble import RandomForestClassifier
from cuml.metrics import accuracy_score, roc_auc_score
from sklearn.metrics import recall_score, precision_score  # ✅ ici

from cuml.model_selection import train_test_split
import pandas as pd

data_path = "creditcard.csv"

df = cudf.read_csv(data_path)
features = [col for col in df.columns if col != "Class"]
X = df[features]
y = df["Class"]

processed_df = X.copy()
processed_df["Class"] = y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = {}

lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_proba_lr = lr.predict_proba(X_test)[:, 1]

results["RAPIDS LogisticRegression"] = {
    "auc": float(roc_auc_score(y_test, y_proba_lr)),
    "accuracy": float(accuracy_score(y_test, y_pred_lr)),
    "recall": float(recall_score(y_test, y_pred_lr)),
    "precision": float(precision_score(y_test, y_pred_lr)),
}

rf = RandomForestClassifier(n_estimators=20)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_proba_rf = rf.predict_proba(X_test)[:, 1]

results["RAPIDS RandomForest"] = {
    "auc": float(roc_auc_score(y_test, y_proba_rf)),
    "accuracy": float(accuracy_score(y_test, y_pred_rf)),
    "recall": float(recall_score(y_test, y_pred_rf)),
    "precision": float(precision_score(y_test, y_pred_rf)),
}

# Export CSV
pd.DataFrame([
    {"model": model, **metrics} for model, metrics in results.items()
]).to_csv("resultats_auc_rapids.csv", index=False)



🔍 Vérification des microservices :
✅ ingestion-service OK : {'status': 'ingestion-service running'}
✅ train-service OK : {'status': 'train-service running'}
✅ predict-service OK : {'status': 'predict-service running'}
✅ compare-service OK : {'status': 'compare-service running'}
📥 Chargement des données CPU depuis : ../data/creditcard.csv

🚀 Entraînement LogisticRegression CPU...

🚀 Entraînement RandomForest CPU...

✅ Résultats CPU sauvegardés dans ../models/resultats_auc_cpu.csv


In [None]:
import cudf
from cuml.linear_model import LogisticRegression as cuLogisticRegression
from cuml.ensemble import RandomForestClassifier as cuRandomForestClassifier
from cuml.metrics import accuracy_score, recall_score, precision_score, roc_auc_score
from cuml.model_selection import train_test_split
import pandas as pd
import time
import os

def run_rapids_models(data_path, results_path="../models/resultats_auc_rapids.csv"):
    print("📥 Chargement des données RAPIDS GPU depuis :", data_path)
    df = cudf.read_csv(data_path)

    features = [col for col in df.columns if col != "Class"]
    X = df[features]
    y = df["Class"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    results = {}

    # Logistic Regression RAPIDS
    print("\n🚀 Entraînement LogisticRegression RAPIDS GPU...")
    start = time.time()
    lr = cuLogisticRegression()
    lr.fit(X_train, y_train)
    train_time_lr = time.time() - start

    y_pred_lr = lr.predict(X_test)
    y_proba_lr = lr.predict_proba(X_test)[:, 1]

    results["RAPIDS LogisticRegression"] = {
        "auc": float(roc_auc_score(y_test, y_proba_lr)),
        "accuracy": float(accuracy_score(y_test, y_pred_lr)),
        "recall": float(recall_score(y_test, y_pred_lr)),
        "precision": float(precision_score(y_test, y_pred_lr)),
        "training_time": train_time_lr
    }

    # Random Forest RAPIDS
    print("\n🚀 Entraînement RandomForest RAPIDS GPU...")
    start = time.time()
    rf = cuRandomForestClassifier(n_estimators=20, random_state=42)
    rf.fit(X_train, y_train)
    train_time_rf = time.time() - start

    y_pred_rf = rf.predict(X_test)
    y_proba_rf = rf.predict_proba(X_test)[:, 1]

    results["RAPIDS RandomForest"] = {
        "auc": float(roc_auc_score(y_test, y_proba_rf)),
        "accuracy": float(accuracy_score(y_test, y_pred_rf)),
        "recall": float(recall_score(y_test, y_pred_rf)),
        "precision": float(precision_score(y_test, y_pred_rf)),
        "training_time": train_time_rf
    }

    # Sauvegarde des résultats dans un CSV compatible pandas
    all_metrics = []
    for model, metrics in results.items():
        row = {"model": model}
        row.update(metrics)
        all_metrics.append(row)
    df_results = pd.DataFrame(all_metrics)
    os.makedirs(os.path.dirname(results_path), exist_ok=True)
    df_results.to_csv(results_path, index=False)
    print(f"\n✅ Résultats RAPIDS sauvegardés dans {results_path}")

    return results


# Si exécution directe en standalone
if __name__ == "__main__":
    DATA_PATH = "../data/creditcard.csv"
    run_rapids_models(DATA_PATH)
