In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os

In [None]:
# Chargement du dataset

df = pd.read_csv("Dataset-IA_vs_Human.csv", encoding="utf-8")
X = df["text"].astype(str)
y = df["label"].astype(int)

In [None]:
# Découpage train/test

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [None]:
# Transformation texte en nombres (TF-IDF)

tfidf = TfidfVectorizer(
    ngram_range=(1, 2),
    min_df=2,
    max_df=0.95,
    lowercase=True,
    strip_accents="unicode"
)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf  = tfidf.transform(X_test)

In [None]:
# Modèle : Régression Logistique

clf = LogisticRegression(class_weight="balanced", max_iter=2000)
clf.fit(X_train_tfidf, y_train)


In [None]:
# pipeline #

pipe = make_pipeline(tfidf, clf)

In [None]:
# Évaluation

y_pred = clf.predict(X_test_tfidf)
print("\n Rapport de classification")
print(classification_report(y_test, y_pred, target_names=["Humain (0)", "IA (1)"]))

print("\n Matrice de confusion")
print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.pipeline import make_pipeline
import joblib
import os

In [None]:
# pipeline #

pipe = make_pipeline(tfidf, clf)

In [None]:
# création du fichier #

joblib.dump(pipe, "model.joblib")
print("OK → model.joblib créé")

In [None]:
app_code = """
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import os

app = FastAPI(title="IA vs Humain — API (TF-IDF + LogReg)")

# Chargement du modèle
if os.path.exists("model.joblib"):
    pipeline = joblib.load("model.joblib")
else:

    print("Erreur : model.joblib non trouvé.")
    exit()

CLASS_MAP = {0: "Humain", 1: "IA"}

class PredictIn(BaseModel):
    text: str

@app.get("/health")
def health():
    return {"status": "ok", "model_loaded": True}

@app.post("/predict")
def predict(req: PredictIn):
    proba = pipeline.predict_proba([req.text])[0]  # [P(0), P(1)]
    label = int(proba[1] >= 0.5)
    return {
        "label": label,
        "label_name": CLASS_MAP[label],
        "proba_human": float(proba[0]),
        "proba_ai": float(proba[1]),
    }
    """

# Création 'app' si inexistant
if not os.path.exists("app"):
    os.makedirs("app")

with open("app/app.py", "w") as f:
    f.write(app_code)

print("OK → app.py créé")

In [None]:
pip install fastapi uvicorn

In [None]:
!uvicorn app:app --reload