In [None]:
%cd ..

In [None]:
from sklearn.metrics import cohen_kappa_score
import numpy as np
import pandas as pd
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm.auto import tqdm
from sklearn.metrics import recall_score, precision_score, f1_score
import nltk
from src.hatespeech.attack import load_attack
nltk.download('punkt')
pd.set_option('max_colwidth', None)

## Setup

In [None]:
def get_logits(text: str, tok, model) -> torch.Tensor:
    if tok.model_max_length > 100_000:
        tok.model_max_length = 512
    toks = tok(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        logits = model(
            input_ids=toks["input_ids"], 
            attention_mask=toks["attention_mask"]
        )[0]
    if len(logits.shape) == 2:
        logits = logits[0]
    return logits[-1]

In [None]:
train_df = pd.read_parquet("data/final/train-off.parquet")
val_df = pd.read_parquet("data/final/val-off.parquet")
test_df = pd.read_parquet("data/final/test-off.parquet")
val_df.head()

## Evaluate models on the agreed labels

In [None]:
val_labels = [1 if lbl == "Offensive" else 0 for lbl in val_df.label]
test_labels = [1 if lbl == "Offensive" else 0 for lbl in test_df.label]

In [None]:
models = [
    ("our XLMR-base model from the first iteration", 'models/xlmr-base1'),
    ("our XLMR-base model from the second iteration", 'models/xlmr-base2'),
    ("our XLMR-base model from the third iteration", 'models/xlmr-base3'),
    ("our XLMR-large model from the second iteration", 'models/xlmr-large'),
    ("our ELECTRA model from the second iteration", 'models/aelaectra'),
    ("our ELECTRA model from the third iteration", 'models/aelaectra2'),
    ("our ELECTRA model from the third iteration w/o contains_offensive_word", 'models/no_contains_offensive_word_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o contains_positive_swear_word", 'models/no_contains_positive_swear_word_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o has_been_moderated", 'models/no_has_been_moderated_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o has_positive_sentiment", 'models/no_has_positive_sentiment_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o is_all_caps", 'models/no_is_all_caps_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o is_dr_answer", 'models/no_is_dr_answer_aelaectra2'),
    ("our ELECTRA model from the third iteration w/o is_mention", 'models/no_is_mention_aelaectra2'),
    ("Guscode", 'Guscode/DKbert-hatespeech-detection'),
    ("DaNLP BERT", 'DaNLP/da-bert-hatespeech-classification'),
    ("DaNLP ELECTRA", 'DaNLP/da-electra-hatespeech-detection'),
    ("A-ttack", 'attack'),
]

with tqdm(models) as pbar:
    for name, model_id in pbar:
        
        # Update progress bar description
        pbar.set_description(f"Evaluating {name}")

        # Load tokenizer and model
        if model_id != "attack":
            tok = AutoTokenizer.from_pretrained(model_id)
            model = AutoModelForSequenceClassification.from_pretrained(model_id)
        else:
            tok, model = load_attack()
            
        for split_name, df, labels in [("val", val_df, val_labels), ("test", test_df, test_labels)]:
        
            # Get predictions
            preds = torch.stack(
                [get_logits(doc, tok, model) for doc in tqdm(df.text, leave=False)]
            ) > 0

            # Compute scores
            recall = recall_score(labels, preds)
            precision = precision_score(labels, preds)
            f1 = f1_score(labels, preds)
            macro_f1 = f1_score(labels, preds, average='macro')

            # Print scores
            print(f'Scores for {name} on the {split_name} split:')
            print(f'\tRecall: {100 * recall:.2f}%')
            print(f'\tPrecision: {100 * precision:.2f}%')
            print(f'\tF1-score: {100 * f1:.2f}%')
        
    pbar.set_description("Evaluating")

## Error analysis

In [None]:
model_id = 'models/xlmr-base3'

# Load tokenizer and model
if model_id == 'attack':
    tok, model = load_attack()
else:
    tok = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(model_id)

# Get logits
logits = torch.stack(
    [get_logits(doc, tok, model) for doc in tqdm(val_df.text, leave=False)]
)

In [None]:
# Add the logits and equivalent probabilities to the validation dataframe
val_df["model_logits"] = logits.tolist()
val_df["model_probs"] = torch.sigmoid(logits).tolist()
val_df.head()

In [None]:
# Get the sample indices on which the model was wrong
wrong_idxs = (
    torch.nonzero((logits > 0) != torch.tensor(val_labels)).squeeze(1).tolist()
)

# Get the samples on which the model was wrong
wrong_df = val_df.loc[wrong_idxs]

# Sort the dataframe by absolute value of logits
wrong_df = wrong_df.sort_values(by='model_logits', key=lambda x: abs(x), ascending=False)

wrong_df