### Import Libararies


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import warnings

warnings.filterwarnings('ignore')

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize



# Connect to drive

extract and inspect data set


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!ls '/content/drive/MyDrive/jigsaw-toxic-comment-classification-challenge'

import zipfile
import pandas as pd




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
sample_submission.csv.zip  test.csv.zip  test_labels.csv.zip  train.csv.zip


In [None]:
# Paths to the zip files in Drive
zip_dir = '/content/drive/MyDrive/jigsaw-toxic-comment-classification-challenge/'

# Unzip train.csv.zip
with zipfile.ZipFile(zip_dir + 'train.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Unzip test.csv.zip
with zipfile.ZipFile(zip_dir + 'test.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Load the CSVs
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

In [None]:
# Load data and inspect data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

print("Training dataset shape:", train_df.shape)
print("Test dataset shape:", test_df.shape)

print("\nFirst 5 rows of the training data:")
print(train_df.head())


print("\nTraining dataset info:")
print(train_df.info())


print("\nMissing values in the training data:")
print(train_df.isnull().sum())



Training dataset shape: (159571, 8)
Test dataset shape: (153164, 2)

First 5 rows of the training data:
                 id                                       comment_text  toxic  \
0  0000997932d777bf  Explanation\nWhy the edits made under my usern...      0   
1  000103f0d9cfb60f  D'aww! He matches this background colour I'm s...      0   
2  000113f07ec002fd  Hey man, I'm really not trying to edit war. It...      0   
3  0001b41b1c6bb37e  "\nMore\nI can't make any real suggestions on ...      0   
4  0001d958c54c6e35  You, sir, are my hero. Any chance you remember...      0   

   severe_toxic  obscene  threat  insult  identity_hate  
0             0        0       0       0              0  
1             0        0       0       0              0  
2             0        0       0       0              0  
3             0        0       0       0              0  
4             0        0       0       0              0  

Training dataset info:
<class 'pandas.core.frame.DataFrame'>

In [None]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
print(train_df[label_cols].sum())


toxic            15294
severe_toxic      1595
obscene           8449
threat             478
insult            7877
identity_hate     1405
dtype: int64


# Text Cleaning


In [None]:
def clean_text(text):
  text = text.lower()

  # Remove URLs
  text = re.sub(r'http\S+|www\S+|https\S+', '', text)

  # Remove email addresses
  text = re.sub(r'\S+@\S+', '', text)

  # Remove special characters and numbers
  text = re.sub(r'[^a-z\s]', '', text)

  # Remove extra spacing
  text = re.sub(r'\s+', ' ', text).strip()

  return text


In [None]:
train_df['clean_comment'] = train_df['comment_text'].apply(clean_text)
print(train_df[['comment_text', 'clean_comment']].head())


                                        comment_text  \
0  Explanation\nWhy the edits made under my usern...   
1  D'aww! He matches this background colour I'm s...   
2  Hey man, I'm really not trying to edit war. It...   
3  "\nMore\nI can't make any real suggestions on ...   
4  You, sir, are my hero. Any chance you remember...   

                                       clean_comment  
0  explanation why the edits made under my userna...  
1  daww he matches this background colour im seem...  
2  hey man im really not trying to edit war its j...  
3  more i cant make any real suggestions on impro...  
4  you sir are my hero any chance you remember wh...  


# Tokenization

remove stop and rare words

In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')


stop_words = set(stopwords.words('english'))

def tokenize_data(text):
  tokens = word_tokenize(text)

  filtered_tokens = [word for word in tokens if word not in stop_words]

  return filtered_tokens

train_df['tokens'] = train_df['clean_comment'].apply(tokenize_data)
print(train_df[['comment_text', 'tokens']].head())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                                        comment_text  \
0  Explanation\nWhy the edits made under my usern...   
1  D'aww! He matches this background colour I'm s...   
2  Hey man, I'm really not trying to edit war. It...   
3  "\nMore\nI can't make any real suggestions on ...   
4  You, sir, are my hero. Any chance you remember...   

                                              tokens  
0  [explanation, edits, made, username, hardcore,...  
1  [daww, matches, background, colour, im, seemin...  
2  [hey, man, im, really, trying, edit, war, guy,...  
3  [cant, make, real, suggestions, improvement, w...  
4         [sir, hero, chance, remember, page, thats]  


In [None]:


from collections import Counter

def build_vocab(token_lists, min_freq=5):
  freq = Counter()
  for tokens in token_lists:
    freq.update(tokens)

  vocab = {'<PAD>': 0, '<UNK>': 1}

  index = 2
  for token, count in freq.items():
    if count >= min_freq:
      vocab[token] = index
      index += 1

  return vocab


# Use a subset of data during development
train_subset = train_df



vocab = build_vocab(train_subset['tokens'], min_freq= 3)

print(f"Vocabulary size: {len(vocab)}")

train_subset['token_ids'] = train_subset['tokens'].apply(lambda tokens: [vocab.get(token, vocab['<UNK>']) for token in tokens])

print(train_subset[['tokens', 'token_ids']].head())







Vocabulary size: 62476
                                              tokens  \
0  [explanation, edits, made, username, hardcore,...   
1  [daww, matches, background, colour, im, seemin...   
2  [hey, man, im, really, trying, edit, war, guy,...   
3  [cant, make, real, suggestions, improvement, w...   
4         [sir, hero, chance, remember, page, thats]   

                                           token_ids  
0  [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...  
1        [1, 28, 29, 30, 26, 31, 32, 33, 23, 34, 35]  
2  [36, 37, 26, 38, 39, 40, 41, 42, 43, 44, 45, 4...  
3  [54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6...  
4                           [95, 96, 97, 98, 24, 99]  


# Padding and Truncation

In [None]:
def pad_and_truncate(sequence, max_length, pad_value=0):
  sequence = sequence[:max_length]
  sequence += [pad_value]*(max_length - len(sequence))
  return sequence

MAX_LEN = 120
train_subset['padded_ids'] = train_subset['token_ids'].apply(lambda seq: pad_and_truncate(seq, max_length=MAX_LEN, pad_value=vocab['<PAD>']))
print(train_subset[['token_ids', 'padded_ids']].head())

lengths = train_subset['padded_ids'].apply(len)
print("Unique sequence lengths after padding:", lengths.unique())



                                           token_ids  \
0  [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...   
1        [1, 28, 29, 30, 26, 31, 32, 33, 23, 34, 35]   
2  [36, 37, 26, 38, 39, 40, 41, 42, 43, 44, 45, 4...   
3  [54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6...   
4                           [95, 96, 97, 98, 24, 99]   

                                          padded_ids  
0  [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...  
1  [1, 28, 29, 30, 26, 31, 32, 33, 23, 34, 35, 0,...  
2  [36, 37, 26, 38, 39, 40, 41, 42, 43, 44, 45, 4...  
3  [54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 6...  
4  [95, 96, 97, 98, 24, 99, 0, 0, 0, 0, 0, 0, 0, ...  
Unique sequence lengths after padding: [120]


# Embedding


In [None]:
import torch
import torch.nn as nn

# Set size of each word vector
embedding_dim = 100

vocab_size = len(vocab)

# Get the index reserved for padding
pad_idx = vocab['<PAD>']

# Create embedding layer that maps word indices to embedding vectors
embedding_layer = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

# Select sample sequence frpm the processed training data
sample_sequence = torch.LongTensor(train_subset['padded_ids'].iloc[0])

# Pass sample sequence through the embedding layer to get its embeddings
sample_embeddings = embedding_layer(sample_sequence)

print("Sample embeddings shape:", sample_embeddings.shape)
print("Embedding matrix shape:", embedding_layer.weight.shape)



Sample embeddings shape: torch.Size([120, 100])
Embedding matrix shape: torch.Size([62476, 100])


# Feed Forward Layer with LSTM and Dropout

In [None]:
from typing_extensions import final
import torch
import torch.nn as nn

class FeedForward(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, pad_idx):
    super(FeedForward, self).__init__()

    # Map word indices to dense vectors
    self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

    # Create bidirectional LSTM layer - with outputs concatenated from forward and backward passes
    self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)

    # Dropout layer for regularization - Drop 50% of neurons during training
    self.dropout = nn.Dropout(0.5)

    # Fully connected layer mapping from hidden dimension to output dimension
    self.fc = nn.Linear(hidden_dim, output_dim)



  def forward(self, text):
    # Pass word indices through embedding layer
    embedded = self.embedding(text)

    # Pass embedded text through LSTM
    lstm_output, (hidden, cell) = self.lstm(embedded)

    # Select last hidden state from LSTM
    f_hidden = hidden[-1]

    # Apply dropout to the selected hidden state - Help reduce overfitting
    drop = self.dropout(f_hidden)

    # Pass dropped out features through the fully connected layer to get final output
    fc_output = self.fc(f_hidden)


    # Return logits for each class
    return fc_output




In [None]:
# Params
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 64
output_dim = 6
pad_idx = vocab['<PAD>']

# Create instance of the FeedForward model
model = FeedForward(vocab_size, embedding_dim, hidden_dim, output_dim, pad_idx)

print(model)

# Retrieve first padded sequence, convert to torch LongTensor, add an extra dimension at the start to represent the batch
sample_sequence = torch.LongTensor(train_subset['padded_ids'].iloc[0]).unsqueeze(0)

# Pass sample sequence through the model to generate output logits
output_tensor = model(sample_sequence)

print("Output tensor shape:", output_tensor.shape)


FeedForward(
  (embedding): Embedding(62476, 100, padding_idx=0)
  (lstm): LSTM(100, 64, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=64, out_features=6, bias=True)
)
Output tensor shape: torch.Size([1, 6])


# Training

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class ToxicDataset(Dataset):
  def __init__(self, dataFrame, text_col='padded_ids', label_cols=['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']):
    self.dataFrame = dataFrame
    self.text_col = text_col
    self.label_cols = label_cols

  def __len__(self):
    return len(self.dataFrame)

  def __getitem__(self, idx):
    # Get a row from the DataFrame at given index
    row = self.dataFrame.iloc[idx]

    # Convert the padded text (list of token IDs) into a torch LongTensor
    input_ids = torch.LongTensor(row[self.text_col])

    # Convert the label columns to a float32 tensor
    labels = torch.tensor(row[self.label_cols].astype('float32').values, dtype=torch.float32)

    return input_ids, labels



In [None]:
# Create custom dataset from the training subset using the ToxicDataset class
train_dataset = ToxicDataset(train_subset)

# Create a DataLoader to iterate over the training set
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Instantiate FeedForward model using defined parameters
model = FeedForward(vocab_size, embedding_dim, hidden_dim, output_dim, pad_idx)

# Define the loss function - Binary Cross Entropy (BCE)
criterion = nn.BCELoss()

# Initialize Adam optimizer with learning rate of 1e-3 and weight decay of 1e-5 (Help prevent overfitting)
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



FeedForward(
  (embedding): Embedding(62476, 100, padding_idx=0)
  (lstm): LSTM(100, 64, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=64, out_features=6, bias=True)
)

In [None]:
from sklearn.model_selection import train_test_split

# Split it into train and validation portions with 80/20 split
train_df, val_df = train_test_split(train_subset, test_size=0.2, random_state=42)

# Create separate dataset objects
train_dataset = ToxicDataset(train_df)
val_dataset = ToxicDataset(val_df)

# Create dataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import f1_score
import math, copy

#Define labels, thresholds, and class counts
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
class_counts = {
    "toxic": 15294,
    "severe_toxic": 1595,
    "obscene": 8449,
    "threat": 478,
    "insult": 7877,
    "identity_hate": 1405
}

# Thresholds for converting predicted probabilities into binary labels
optimized_thresholds = {
    "toxic": 0.5082,
    "severe_toxic": 0.5408,
    "obscene": 0.6551,
    "threat": 0.1,
    "insult": 0.5082,
    "identity_hate": 0.1
}

# Compute weights using the square root ratio relative to the most frequent class
max_count = max(class_counts.values())
weights = [math.sqrt(max_count / class_counts[lbl]) for lbl in label_cols]
print("Computed weights:", {lbl: round(w, 2) for lbl, w in zip(label_cols, weights)})

# Convert the optimized thresholds into a numpy array
optimized_thresh_array = np.array([optimized_thresholds[lbl] for lbl in label_cols])
print("Optimized thresholds array:", optimized_thresh_array)

# Create a tensor for the weights, and use it in BCEWithLogitsLoss for multi-label classification
pos_weight = torch.tensor(weights, device=device)  # tensor of shape (num_classes,)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Evaluation Function
def evaluate(model, dataloader, criterion, device, thresh=0.5):
    """
    Runs the model on the validation set and returns:
      - average loss
      - per‑class F1 scores
      - macro‑averaged F1 score
      - raw probabilities (all_probs)
    """
    model.eval()  # Set model to evaluation mode
    val_loss, all_probs, all_labels = 0.0, [], []

    # No gradients needed for evalulation
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            logits = model(inputs)
            loss = criterion(logits, labels)
            val_loss += loss.item() * inputs.size(0)  # Accumlate loss weighted by batch size
            probs = torch.sigmoid(logits)             # Compute probabilities from logits
            all_probs.append(probs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    val_loss /= len(dataloader.dataset) # Average loss over the entire validation set

    # Concatenate all batch outputs into single arrays
    all_probs = np.concatenate(all_probs, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    # Convert probabilities to binary predictions using thresholds
    if np.isscalar(thresh):
        bin_preds = (all_probs >= thresh).astype(int)
    else:
        thresh = np.asarray(thresh)
        bin_preds = (all_probs >= thresh).astype(int)

    # Compute F1 scores for each label
    f1_per_class = [f1_score(all_labels[:, i], bin_preds[:, i], zero_division=0)
                    for i in range(all_labels.shape[1])]
    f1_macro = np.mean(f1_per_class)

    return val_loss, f1_per_class, f1_macro, all_probs, all_labels

# Main Training Loop
num_epochs      = 8
patience        = 2  # Stop if no improvement for 2 consecutive epochs
checkpoint_path = "best_macroF1_weighted.pt"
delta           = 1e-4  # Min. improvement threshold

best_macro_f1     = 0.0
epochs_no_improve = 0

# Loop over each epoch
for epoch in range(1, num_epochs + 1):
    model.train() # Set model to training mode
    running_loss = 0.0  # Accumulate loss over batches

    # Loop over training data
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad() # Clear previous gradients

        logits = model(inputs)  # Get model outputs
        loss = criterion(logits, labels)  # Compute loss
        loss.backward() # Backpropagate
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Clip gradients
        optimizer.step()  # Update model parameters

        running_loss += loss.item() * inputs.size(0)`# Accumulate weighted loss

    # Compute average training loss
    train_loss = running_loss / len(train_dataset)

    # Evaluate model on validation set and capture predictions
    val_loss, f1_per_class, f1_macro, all_probs, all_labels = evaluate(model, val_dataloader, criterion, device, thresh=optimized_thresh_array)


    # Save the best model, based on macro F1 - Use early stopping
    if f1_macro > best_macro_f1 + delta:
        best_macro_f1 = f1_macro
        epochs_no_improve = 0
        torch.save(model.state_dict(), checkpoint_path)
        print("New best model saved (macro‑F1)")
    else:
        epochs_no_improve += 1
        print(f"No macro‑F1 improvement for {epochs_no_improve} epoch(s)")

    print(f"Epoch {epoch}/{num_epochs}")
    print(f"  Train Loss  : {train_loss:.4f}")
    print(f"  Val   Loss  : {val_loss:.4f}")
    print(f"  F1 per class: {np.round(f1_per_class, 4).tolist()}")
    print(f"  F1 macro    : {f1_macro:.4f}")

    if epochs_no_improve >= patience:
        print("Early stopping on macro‑F1.")
        break

Computed weights: {'toxic': 1.0, 'severe_toxic': 3.1, 'obscene': 1.35, 'threat': 5.66, 'insult': 1.39, 'identity_hate': 3.3}
Optimized thresholds array: [0.5082 0.5408 0.6551 0.1    0.5082 0.1   ]
New best model saved (macro‑F1)
Epoch 1/8
  Train Loss  : 0.1077
  Val   Loss  : 0.0772
  F1 per class: [0.7638, 0.4088, 0.7504, 0.0855, 0.6864, 0.1962]
  F1 macro    : 0.4818
New best model saved (macro‑F1)
Epoch 2/8
  Train Loss  : 0.0703
  Val   Loss  : 0.0680
  F1 per class: [0.7629, 0.4989, 0.7951, 0.1592, 0.6795, 0.2573]
  F1 macro    : 0.5255
New best model saved (macro‑F1)
Epoch 3/8
  Train Loss  : 0.0622
  Val   Loss  : 0.0651
  F1 per class: [0.7787, 0.5105, 0.7885, 0.1176, 0.7128, 0.2696]
  F1 macro    : 0.5296
New best model saved (macro‑F1)
Epoch 4/8
  Train Loss  : 0.0576
  Val   Loss  : 0.0637
  F1 per class: [0.7847, 0.4866, 0.7724, 0.2197, 0.7099, 0.3587]
  F1 macro    : 0.5554
New best model saved (macro‑F1)
Epoch 5/8
  Train Loss  : 0.0539
  Val   Loss  : 0.0629
  F1 per cl

In [None]:
import numpy as np
np.save("all_probs.npy", all_probs)
np.save("all_labels.npy", all_labels)

In [None]:
all_p = np.load("all_probs.npy")
all_l = np.load("all_labels.npy")

print("Shape of all_probs:", all_probs.shape)
print("Shape of all_labels:", all_labels.shape)
print("Preview of all_probs:")
print(all_probs[:5])
print("Preview of all_labels:")
print(all_labels[:5])

Shape of all_probs: (31915, 6)
Shape of all_labels: (31915, 6)
Preview of all_probs:
[[2.9861578e-01 1.2028109e-03 4.2501405e-02 1.2003662e-03 2.0981105e-01
  2.7093254e-02]
 [1.7251348e-03 4.9220725e-05 3.2845960e-04 1.2406283e-04 2.6598977e-04
  1.5332771e-04]
 [3.4004752e-02 4.9237802e-04 7.1031009e-03 1.5335761e-03 9.2546688e-03
  2.8343578e-03]
 [1.3090563e-03 4.3905198e-05 3.5629544e-04 1.2484952e-04 1.2880859e-04
  7.3169460e-05]
 [1.4373098e-03 1.8635601e-05 2.3001932e-04 6.0563147e-05 1.7523204e-04
  2.3720022e-04]]
Preview of all_labels:
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


In [None]:
import numpy as np
import torch
from sklearn.metrics import (
    precision_recall_fscore_support,
    accuracy_score,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    f1_score,
)
from pathlib import Path

# Define labels and thresholds
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
optimized_thresholds = {
    "toxic": 0.5082,
    "severe_toxic": 0.5408,
    "obscene": 0.6551,
    "threat": 0.1,
    "insult": 0.5082,
    "identity_hate": 0.1
}
optimized_thresh_array = np.array([optimized_thresholds[lbl] for lbl in label_cols])
print("Optimized thresholds array:", optimized_thresh_array)

# Binarize predictions using thresholds
bin_preds = (all_probs >= optimized_thresh_array).astype(int)

# Compute Per‑label Precision, Recall, and F1 metrics
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, bin_preds, average=None, zero_division=0)
macro_f1 = np.mean(f1)

# Micro averaged metrics
micro_p, micro_r, micro_f1, _ = precision_recall_fscore_support(
    all_labels.ravel(), bin_preds.ravel(), average="micro", zero_division=0
)

subset_acc = accuracy_score(all_labels.tolist(), bin_preds.tolist())
roc_auc_macro = roc_auc_score(all_labels, all_probs, average="macro")
pr_auc_macro = average_precision_score(all_labels, all_probs, average="macro")

# Compute all label Confusion Matrices
conf_matrices = {}
jacc_per_label = []
for i, lbl in enumerate(label_cols):
    cm = confusion_matrix(all_labels[:, i], bin_preds[:, i])
    conf_matrices[lbl] = cm
    print(f"Confusion matrix for {lbl}:")
    print(cm)
    print()


print("\n===== Evaluation Metrics =====")
for i, lbl in enumerate(label_cols):
    print(f"{lbl:15s}  Precision: {prec[i]:.3f}  Recall: {rec[i]:.3f}  F1: {f1[i]:.3f}")
print("-------------------------------------------------")
print(f"Macro‑F1           : {macro_f1:.4f}")
print(f"Micro‑F1           : {micro_f1:.4f}")
print(f"Subset accuracy    : {subset_acc:.4f}")
print(f"ROC‑AUC (macro)    : {roc_auc_macro:.4f}")
print(f"PR‑AUC  (macro)    : {pr_auc_macro:.4f}")
print("=================================================\n")

Optimized thresholds array: [0.5082 0.5408 0.6551 0.1    0.5082 0.1   ]
Confusion matrix for toxic:
[[28512   347]
 [  896  2160]]

Confusion matrix for severe_toxic:
[[31399   195]
 [  145   176]]

Confusion matrix for obscene:
[[30027   173]
 [  461  1254]]

Confusion matrix for threat:
[[31583   258]
 [   17    57]]

Confusion matrix for insult:
[[29794   507]
 [  431  1183]]

Confusion matrix for identity_hate:
[[31057   564]
 [   84   210]]


===== Evaluation Metrics =====
toxic            Precision: 0.862  Recall: 0.707  F1: 0.777
severe_toxic     Precision: 0.474  Recall: 0.548  F1: 0.509
obscene          Precision: 0.879  Recall: 0.731  F1: 0.798
threat           Precision: 0.181  Recall: 0.770  F1: 0.293
insult           Precision: 0.700  Recall: 0.733  F1: 0.716
identity_hate    Precision: 0.271  Recall: 0.714  F1: 0.393
-------------------------------------------------
Macro‑F1           : 0.5810
Micro‑F1           : 0.9787
Subset accuracy    : 0.9104
ROC‑AUC (macro)    : 0.

# Generate word embeddings to show similar words

In [None]:
import numpy as np

# Extract the embedding weights from the model's embedding layer
model.eval()
embedding_weights = model.embedding.weight.data.cpu().numpy()

# Build an inverse vocabulary (mapping index to word).
inv_vocab = {idx: word for word, idx in vocab.items()}

def cosine_similarity(a, b):
    """Compute the cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def find_similar_words(query_word, embedding_weights, vocab, inv_vocab, top_n=10):
    """Find the top_n words most similar to the query_word."""
    if query_word not in vocab:
        print(f"'{query_word}' not found in the vocabulary.")
        return []
    query_idx = vocab[query_word]
    query_vec = embedding_weights[query_idx]

    # Compute cosine similarity for every word in the vocabulary
    similarities = []
    for idx, vec in enumerate(embedding_weights):
        sim = cosine_similarity(query_vec, vec)
        similarities.append((inv_vocab[idx], sim))

    # Sort by similarity, highest first
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    similar_words = [(word, sim) for word, sim in similarities if word != query_word][1:top_n]
    return similar_words

# Find words similar to "sucker"
similar_words = find_similar_words("sucker", embedding_weights, vocab, inv_vocab, top_n=20)
print("Words similar to 'sucker':")
for word, sim in similar_words:
    print(f"{word}: {sim:.4f}")


Words similar to 'sucker':
badaga: 0.3560
ramped: 0.3459
provocateur: 0.3375
chicken: 0.3367
stupid: 0.3361
marathonios: 0.3318
sizzling: 0.3305
vicepresidential: 0.3296
infactual: 0.3289
twenties: 0.3268
ayoubmalouk: 0.3264
nao: 0.3208
moron: 0.3207
thong: 0.3205
dogshit: 0.3185
vcard: 0.3181
transitioned: 0.3150
ewhc: 0.3130
tormenting: 0.3130
