In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer
import numpy as np

In [None]:
class HateSpeechClassifier:
    """Transformer-based logistic model to classify hate speech in text."""

    def __init__(self, threshold: float = 0.75):
        """
        Initializes the classifier with a threshold and sentence encoder.
        
        Args:
            threshold (float): Probability threshold for classifying hate speech.
        """
        self.threshold = threshold
        self.encoder = SentenceTransformer("all-MiniLM-L6-v2")
        self.model = LogisticRegression(max_iter=1000)

    def embed(self, texts: pd.Series) -> np.ndarray:
        """
        Encode text into sentence embeddings.
        
        Args:
            texts (pd.Series): Input text data.
        
        Returns:
            np.ndarray: Sentence embeddings.
        """
        return self.encoder.encode(texts.tolist(), show_progress_bar=False)

    def train(self, X: pd.Series, y: pd.Series) -> None:
        """
        Train the logistic regression model on sentence embeddings.
        
        Args:
            X (pd.Series): Input text.
            y (pd.Series): Labels (0 or 1).
        """
        X_embed = self.embed(X)
        self.model.fit(X_embed, y)

    def predict_proba(self, X: pd.Series) -> pd.Series:
        """
        Predict the probability of each input being hate speech.
        
        Args:
            X (pd.Series): Input text.
        
        Returns:
            pd.Series: Probabilities of being hate speech.
        """
        X_embed = self.embed(X)
        proba = self.model.predict_proba(X_embed)[:, 1]
        return pd.Series(proba, index=X.index)

    def predict(self, X: pd.Series) -> pd.Series:
        """
        Predict labels based on the probability threshold.
        
        Args:
            X (pd.Series): Input text.
        
        Returns:
            pd.Series: Predicted labels.
        """
        proba = self.predict_proba(X)
        return (proba > self.threshold).astype(int)

    def evaluate(self, X_test: pd.Series, y_test: pd.Series) -> None:
        """
        Evaluate the model with classification metrics.
        
        Args:
            X_test (pd.Series): Input text.
            y_test (pd.Series): True labels.
        """
        y_pred = self.predict(X_test)
        print(classification_report(y_test, y_pred))


def main():
    """Main script: train model, evaluate, and predict on test set."""
    train_df = pd.read_csv("../data/train.csv")
    test_df = pd.read_csv("../data/test.csv")

    model = HateSpeechClassifier(threshold=0.75)

    X_train, X_val, y_train, y_val = train_test_split(
        train_df["text"],
        train_df["label"],
        test_size=0.2,
        random_state=42
    )

    model.train(X_train, y_train)
    model.evaluate(X_val, y_val)

    test_probs = model.predict_proba(test_df["text"])
    test_preds = model.predict(test_df["text"])

    test_df["probability"] = test_probs
    test_df["prediction"] = test_preds
    test_df.to_csv("../data/hate_speech_predictions.csv", index=False)


if __name__ == "__main__":
    main()