In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, average_precision_score
import pandas as pd

from project_hatebert_models import (LogisticHateBERT, RandomForestHateBERT, LassoHateBERT, HateSpeechXGBClassifier, 
                            KNNHateSpeechClassifier, RidgeHateBERT, NBHateBERTClassifier)

In [None]:
train_df = pd.read_csv("../data/train_data.csv")
test_df = pd.read_csv("../data/test_data.csv", sep=";")

X_train = train_df["text"]
y_train = train_df["label"]
X_test = test_df["comment"]
y_test = (test_df["isHate"] >= 0.25).astype(int)

In [None]:
models = [
    ("Lasso", LassoHateBERT()),
    ("Logistic", LogisticHateBERT()),
    ("Ridge", RidgeHateBERT()),
    ("KNN", KNNHateSpeechClassifier()),
    ("XGBoost", HateSpeechXGBClassifier()),
    ("Random Forest", RandomForestHateBERT()),
    ("Gaussian Naive Bayes", NBHateBERTClassifier())
]

In [None]:
plt.figure(figsize=(10, 8))

for name, model in models:
    model.train(X_train, y_train)
    proba = model.predict_proba(X_test)

    if isinstance(proba, pd.Series):
        proba = proba.values
    elif proba.ndim > 1:
        proba = proba[:, 1]

    precision, recall, _ = precision_recall_curve(y_test, proba)
    ap_score = average_precision_score(y_test, proba)
    plt.plot(recall, precision, label=f"{name} (AP = {ap_score:.2f})")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve Comparison")
plt.legend(loc="lower left")
plt.grid(True)
plt.show()