In [1]:
import huggingface_hub
from datasets import load_dataset
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Error loading punkt: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1000)>
[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1000)>


False

In [2]:
# Load datasets 
# Hate Xplain
hate_xplain = pd.read_csv("hate_xplain.csv")

# Implicit Hate 
implicit_hate = pd.read_csv('implicit-hate-corpus/implicit_hate_v1_stg2_posts.tsv', delimiter='\t')
label_map = {
    'white_grievance': 0, 'incitement': 1, 'inferiority': 2,
    'irony': 3, 'stereotypical': 4, 'threatening': 5, 'other': 6
}

implicit_hate['class_label'] = implicit_hate['implicit_class'].map(label_map)
implicit_hate.drop("extra_implicit_class", axis=1, inplace=True)

# Toxic-Spans
annotations = pd.read_csv('toxic-spans/annotations.csv')
comments = pd.read_csv('toxic-spans/comments.csv')

toxic_spans = pd.merge(annotations, comments, on='comment_id')

In [3]:
def bow_preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    porter = PorterStemmer()

    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [porter.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

def bow_preprocess_data(data, label, vectorizer): 
    data = data.apply(bow_preprocess_text)
    X = vectorizer.fit_transform(data)
    y = label

    return X, y

In [4]:
vectorizer = CountVectorizer()
# Hate Xplain
hx_X, hx_y = bow_preprocess_data(hate_xplain['tweet'], hate_xplain['class'], vectorizer)
hx_train_X, hx_test_X, hx_train_y, hx_test_y = train_test_split(hx_X, hx_y, test_size=0.2, random_state=42)

# Implicit Hate
ih_X, ih_y = bow_preprocess_data(implicit_hate['post'], implicit_hate['class_label'], vectorizer)
ih_train_X, ih_test_X, ih_train_y, ih_test_y = train_test_split(ih_X, ih_y, test_size=0.2, random_state=42)

# Toxic-Spans
ts_X, ts_y = bow_preprocess_data(toxic_spans['comment_text'], toxic_spans['all toxic'], vectorizer)
ts_train_X, ts_test_X, ts_train_y, ts_test_y = train_test_split(ts_X, ts_y, test_size=0.2, random_state=42)


In [5]:
def evaluate(y_test, y_pred):
    classification = classification_report(y_test, y_pred, zero_division=1)
    return classification

In [6]:
def run(train_X, train_y, test_X, test_y): 
    model = MultinomialNB()
    model.fit(train_X, train_y)
    y_pred = model.predict(test_X)
    stats = evaluate(test_y, y_pred)
    return stats 

hx_stats = run(hx_train_X, hx_train_y, hx_test_X, hx_test_y)
ih_stats = run(ih_train_X, ih_train_y, ih_test_X, ih_test_y)
ts_stats = run(ts_train_X, ts_train_y, ts_test_X, ts_test_y)

In [7]:
def print_stats(stats, name: str):
    print(f"Stats for {name}")
    print(stats)

print_stats(hx_stats, "Hate Xplain")
print_stats(ih_stats, "Implicit Hate")
print_stats(ts_stats, "Toxic-Spans")

Stats for Hate Xplain
              precision    recall  f1-score   support

           0       0.34      0.09      0.14       290
           1       0.88      0.97      0.92      3832
           2       0.83      0.65      0.73       835

    accuracy                           0.86      4957
   macro avg       0.68      0.57      0.60      4957
weighted avg       0.84      0.86      0.84      4957

Stats for Implicit Hate
              precision    recall  f1-score   support

           0       0.51      0.71      0.59       325
           1       0.54      0.49      0.51       253
           2       0.63      0.37      0.47       167
           3       0.57      0.40      0.47       169
           4       0.49      0.47      0.48       219
           5       0.49      0.62      0.55       120
           6       1.00      0.00      0.00        17

    accuracy                           0.52      1270
   macro avg       0.60      0.44      0.44      1270
weighted avg       0.54      0.

In [17]:
embedding_dim = 128 
hidden_dim = 128 
epochs = 5
batch_size = 128
max_seq_len = 200 

class ToxicDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.long), self.labels[idx]

class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        _, (hidden, _) = self.lstm(x)
        hidden = hidden[-1]  # Extract the last layer's hidden state
        x = self.fc(hidden)
        return x
    
def lstm_prepare_data(data, labels, is_binary: bool):
    vectorizer = CountVectorizer(max_features=10000)
    X = vectorizer.fit_transform(data).toarray()

    # Encode labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(labels)
    output_dim = 2 if is_binary else len(label_encoder.classes_)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create datasets and dataloaders
    train_dataset = ToxicDataset(X_train, y_train)
    test_dataset = ToxicDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize model
    vocab_size = len(vectorizer.vocabulary_)
    return train_loader, test_loader, vocab_size, output_dim

In [18]:
hx_train_loader, hx_test_loader, hx_vocab_size, hx_output_dim = lstm_prepare_data(hate_xplain['tweet'], hate_xplain['class'], is_binary=False)
ih_train_loader, ih_test_loader, ih_vocab_size, ih_output_dim = lstm_prepare_data(implicit_hate['post'], implicit_hate['class_label'], is_binary=False)
ts_train_loader, ts_test_loader, ts_vocab_size, ts_output_dim = lstm_prepare_data(toxic_spans['comment_text'], toxic_spans['all toxic'], is_binary=True)

hx_LSTM = LSTMClassifier(hx_vocab_size, embedding_dim, hidden_dim, hx_output_dim)
ih_LSTM = LSTMClassifier(ih_vocab_size, embedding_dim, hidden_dim, ih_output_dim)
ts_LSTM = LSTMClassifier(ts_vocab_size, embedding_dim, hidden_dim, ts_output_dim)

155

In [15]:
def train(model, train_loader, test_loader, epochs, criterion, optimizer):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, (data, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for i, (data, labels) in enumerate(test_loader):
            outputs = model(data)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.append(preds)
            all_labels.append(labels)
    print(f"Test Loss: {running_loss / len(test_loader)}")
    stats = evaluate(all_labels, all_preds)
    return stats

In [20]:
train(hx_LSTM, hx_train_loader, hx_test_loader, 1, nn.CrossEntropyLoss(), optim.Adam(hx_LSTM.parameters()))

KeyboardInterrupt: 

In [None]:
hx_train_loader.__len__()

In [None]:
hx_test_loader.__len__()

In [None]:
implicit_hate

In [None]:
toxic_spans