In [None]:
import sys
import os
import pandas as pd
import time
import requests

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, recall_score, precision_score
from sklearn.preprocessing import StandardScaler

DATA_PATH = "../data/creditcard.csv"
OUTPUT_CPU_CSV = "../models/resultats_auc_cpu.csv"
OUTPUT_RAPIDS_CSV = "../models/resultats_auc_rapids.csv"

INGESTION_URL = "http://localhost:8001"
TRAIN_URL = "http://localhost:8002"
PREDICT_URL = "http://localhost:8003"
COMPARE_URL = "http://localhost:8004"

print("\n🔍 Vérification des microservices :")
for name, url in [
    ("ingestion-service", INGESTION_URL),
    ("train-service", TRAIN_URL),
    ("predict-service", PREDICT_URL),
    ("compare-service", COMPARE_URL)
]:
    try:
        res = requests.get(f"{url}/status")
        print(f"✅ {name} OK :", res.json())
    except Exception as e:
        print(f"❌ {name} erreur :", e)


def train_cpu_model():
    print("📥 Chargement des données CPU depuis :", DATA_PATH)
    df = pd.read_csv(DATA_PATH)

    features = [col for col in df.columns if col != "Class"]
    X = df[features]
    y = df["Class"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    results = {}

    print("\n🚀 Entraînement LogisticRegression CPU...")
    start = time.time()
    lr = LogisticRegression(max_iter=5000, solver='lbfgs', random_state=42)
    lr.fit(X_train_scaled, y_train)
    train_time_lr = time.time() - start

    y_pred_lr = lr.predict(X_test_scaled)
    y_proba_lr = lr.predict_proba(X_test_scaled)[:, 1]

    results["CPU LogisticRegression"] = {
        "auc": roc_auc_score(y_test, y_proba_lr),
        "accuracy": accuracy_score(y_test, y_pred_lr),
        "recall": recall_score(y_test, y_pred_lr),
        "precision": precision_score(y_test, y_pred_lr),
        "training_time": train_time_lr
    }

    print("\n🚀 Entraînement RandomForest CPU...")
    start = time.time()
    rf = RandomForestClassifier(n_estimators=20, random_state=42)
    rf.fit(X_train, y_train)
    train_time_rf = time.time() - start

    y_pred_rf = rf.predict(X_test)
    y_proba_rf = rf.predict_proba(X_test)[:, 1]

    results["CPU RandomForest"] = {
        "auc": roc_auc_score(y_test, y_proba_rf),
        "accuracy": accuracy_score(y_test, y_pred_rf),
        "recall": recall_score(y_test, y_pred_rf),
        "precision": precision_score(y_test, y_pred_rf),
        "training_time": train_time_rf
    }

    all_metrics = []
    for model, metrics in results.items():
        row = {"model": model}
        row.update(metrics)
        all_metrics.append(row)

    os.makedirs(os.path.dirname(OUTPUT_CPU_CSV), exist_ok=True)
    df_results = pd.DataFrame(all_metrics)
    df_results.to_csv(OUTPUT_CPU_CSV, index=False)
    print(f"\n✅ Résultats CPU sauvegardés dans {OUTPUT_CPU_CSV}")

    return results


if __name__ == "__main__":
    train_cpu_model()
