In [10]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import (roc_curve, auc, precision_recall_curve, average_precision_score, 
    confusion_matrix, 
    ConfusionMatrixDisplay, 
    accuracy_score,
    precision_score,
    recall_score)
from transformers import AutoTokenizer, AutoModel
import torch

In [11]:
class LassoHateBERT:
    """Lasso model using HateBERT embeddings to detect hate speech."""

    def __init__(self):
        """
        Initialize classifier with HateBERT and L1-penalized logistic regression.
        """
        self.tokenizer = AutoTokenizer.from_pretrained("GroNLP/hateBERT")
        self.model_encoder = AutoModel.from_pretrained("GroNLP/hateBERT")
        self.model = LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000)
        self.best_threshold = 0.25

    def embed(self, texts: pd.Series, batch_size: int = 16) -> np.ndarray:
        """
        Embed text using HateBERT [CLS] token embeddings in batches.

        Args:
            texts (pd.Series): Input phrases.
            batch_size (int): Number of texts per batch.

        Returns:
            np.ndarray: Sentence embeddings.
        """
        self.model_encoder.eval()
        embeddings = []

        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size].tolist()
            encoded = self.tokenizer(batch_texts, return_tensors="pt", truncation=True,
                                    padding=True, max_length=512)
            with torch.no_grad():
                output = self.model_encoder(**encoded)
                cls_embeddings = output.last_hidden_state[:, 0, :]
                embeddings.append(cls_embeddings.cpu().numpy())

        return np.vstack(embeddings)

    def train(self, X: pd.Series, y: pd.Series) -> None:
        X_embed = self.embed(X)
        self.model.fit(X_embed, y)

    def predict_proba(self, X: pd.Series) -> pd.Series:
        X_embed = self.embed(X)
        proba = self.model.predict_proba(X_embed)[:, 1]
        return pd.Series(proba, index=X.index)

    def find_best_threshold(self, y_true: pd.Series, proba: pd.Series, metric: str) -> float:
        """Find threshold that maximizes the chosen metric."""
        thresholds = np.linspace(0.0, 1.0, 101)
        scores = []

        for t in thresholds:
            y_pred = (proba >= t).astype(int)
            if metric == "accuracy":
                score = accuracy_score(y_true, y_pred)
            elif metric == "precision":
                score = precision_score(y_true, y_pred, zero_division=0)
            elif metric == "recall":
                score = recall_score(y_true, y_pred, zero_division=0)
            else:
                raise ValueError(f"Unsupported metric: {metric}")
            scores.append(score)

        best_idx = np.argmax(scores)
        return thresholds[best_idx]

    def evaluate(self, X_test: pd.Series, y_test: pd.Series, metric: str = "accuracy") -> None:
        """
        Evaluate model and print classification report using optimal threshold.

        Args:
            X_test (pd.Series): Test input texts.
            y_test (pd.Series): True binary labels.
            metric (str): Metric to optimize threshold on ('accuracy', 'precision', 'recall').
        """
        proba = self.predict_proba(X_test)
        self.best_threshold = self.find_best_threshold(y_test, proba, metric)

        print(f"Best threshold based on {metric}: {self.best_threshold:.2f}")
        y_pred = (proba >= self.best_threshold).astype(int)
        print(classification_report(y_test, y_pred))

In [12]:
train_df = pd.read_csv("../data/train_data.csv")
test_df = pd.read_csv("../data/test_data.csv", sep = ";")

X_train = train_df["text"]
y_train = train_df["label"]

model = LassoHateBERT()
model.train(X_train, y_train)

X_test = test_df["comment"]
y_test = (test_df["isHate"] >= model.best_threshold).astype(int)

model.evaluate(X_test, y_test)

KeyboardInterrupt: 

In [None]:
test_probs = model.predict_proba(X_test)

fpr, tpr, _ = roc_curve(y_test, test_probs)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f"ROC curve (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Test Data")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

In [None]:
precision, recall, _ = precision_recall_curve(y_test, test_probs)
avg_precision = average_precision_score(y_test, test_probs)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f"Avg Precision = {avg_precision:.2f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve - Test Data")
plt.legend(loc="upper right")
plt.grid(True)
plt.show()

In [None]:
y_test_pred = (test_probs >= 0.25).astype(int)

cm = confusion_matrix(y_test, y_test_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Not Hate", "Hate"])

disp.plot(cmap="Blues")
plt.title("Confusion Matrix - Test Data")
plt.grid(False)
plt.show()