### Import Libararies


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import warnings

warnings.filterwarnings('ignore')

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize



# Connect to drive

extract and inspect data set


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!ls '/content/drive/MyDrive/jigsaw-toxic-comment-classification-challenge'

import zipfile
import pandas as pd




Mounted at /content/drive
sample_submission.csv.zip  test.csv.zip  test_labels.csv.zip  train.csv.zip


In [None]:
# Paths to the zip files in Drive
zip_dir = '/content/drive/MyDrive/jigsaw-toxic-comment-classification-challenge/'

# Unzip train.csv.zip
with zipfile.ZipFile(zip_dir + 'train.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Unzip test.csv.zip
with zipfile.ZipFile(zip_dir + 'test.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Load the CSVs
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

In [None]:
# Load data and inspect data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

print("Training dataset shape:", train_df.shape)
print("Test dataset shape:", test_df.shape)

print("\nFirst 5 rows of the training data:")
print(train_df.head())


print("\nTraining dataset info:")
print(train_df.info())


print("\nMissing values in the training data:")
print(train_df.isnull().sum())



Training dataset shape: (159571, 8)
Test dataset shape: (153164, 2)

First 5 rows of the training data:
                 id                                       comment_text  toxic  \
0  0000997932d777bf  Explanation\nWhy the edits made under my usern...      0   
1  000103f0d9cfb60f  D'aww! He matches this background colour I'm s...      0   
2  000113f07ec002fd  Hey man, I'm really not trying to edit war. It...      0   
3  0001b41b1c6bb37e  "\nMore\nI can't make any real suggestions on ...      0   
4  0001d958c54c6e35  You, sir, are my hero. Any chance you remember...      0   

   severe_toxic  obscene  threat  insult  identity_hate  
0             0        0       0       0              0  
1             0        0       0       0              0  
2             0        0       0       0              0  
3             0        0       0       0              0  
4             0        0       0       0              0  

Training dataset info:
<class 'pandas.core.frame.DataFrame'>

In [None]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
print(train_df[label_cols].sum())


toxic            15294
severe_toxic      1595
obscene           8449
threat             478
insult            7877
identity_hate     1405
dtype: int64


# Text Cleaning


In [None]:
def clean_text(text):
  text = text.lower()

  # Remove URLs
  text = re.sub(r'http\S+|www\S+|https\S+', '', text)

  # Remove email addresses
  text = re.sub(r'\S+@\S+', '', text)

  # Remove special characters and numbers
  text = re.sub(r'[^a-z\s]', '', text)

  # Remove extra spacing
  text = re.sub(r'\s+', ' ', text).strip()

  return text


In [None]:
train_df['clean_comment'] = train_df['comment_text'].apply(clean_text)
print(train_df[['comment_text', 'clean_comment']].head())


                                        comment_text  \
0  Explanation\nWhy the edits made under my usern...   
1  D'aww! He matches this background colour I'm s...   
2  Hey man, I'm really not trying to edit war. It...   
3  "\nMore\nI can't make any real suggestions on ...   
4  You, sir, are my hero. Any chance you remember...   

                                       clean_comment  
0  explanation why the edits made under my userna...  
1  daww he matches this background colour im seem...  
2  hey man im really not trying to edit war its j...  
3  more i cant make any real suggestions on impro...  
4  you sir are my hero any chance you remember wh...  


# Tokenization

remove stop and rare words

In [None]:
!pip install transformers
from transformers import DistilBertModel



In [None]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')


print("Vocabulary size:", tokenizer.vocab_size)



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Vocabulary size: 30522


# DistilBERT + feedforwad


In [None]:

import torch.nn as nn

class CommentClassifier(nn.Module):
  def __init__(self):
    super(CommentClassifier, self).__init__()
    self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
    self.classifier = nn.Linear(self.distilbert.config.hidden_size, 6)

  def forward(self, input_ids, attention_mask):
    outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
    cls_output = outputs.last_hidden_state[:,0]
    logits = self.classifier(cls_output)
    return logits



# Embedding


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class ToxicDataset(Dataset):
  def __init__(self, dataFrame, text_col='clean_comment', label_cols=['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'], max_length=120):
    self.dataFrame = dataFrame
    self.tokenizer = tokenizer
    self.text_col = text_col
    self.label_cols = label_cols
    self.max_length = max_length

  def __len__(self):
    return len(self.dataFrame)

  def __getitem__(self, idx):
    # Get a row from the DataFrame at given index
    row = self.dataFrame.iloc[idx]
    text = row[self.text_col]

    # Tokenize
    encoding = self.tokenizer( text, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt')

    # Convert the padded text (list of token IDs) into a torch LongTensor
    input_ids = encoding['input_ids'].squeeze(0)
    attention_mask = encoding['attention_mask'].squeeze(0)

    # Convert the label columns to a float32 tensor
    labels = torch.tensor(row[self.label_cols].astype('float32').values, dtype=torch.float32)

    return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}



In [None]:
import torch
import torch.nn as nn


dataset = ToxicDataset(train_df)

sample = dataset[0]


print("Input IDs shape:", sample['input_ids'].shape)
print("Attention mask shape:", sample['attention_mask'].shape)
print("Labels:", sample['labels'])

input_ids = sample['input_ids'].unsqueeze(0)
attention_mask = sample['attention_mask'].unsqueeze(0)

model = CommentClassifier()

model.eval()
with torch.no_grad():
  logits = model(input_ids, attention_mask)

print("Logits shape:", logits.shape)


Input IDs shape: torch.Size([120])
Attention mask shape: torch.Size([120])
Labels: tensor([0., 0., 0., 0., 0., 0.])


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Logits shape: torch.Size([1, 6])


# Training

In [None]:
# Create custom dataset from the training subset using the ToxicDataset class
train_dataset = ToxicDataset(train_df)

# Create a DataLoader to iterate over the training set
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Instantiate FeedForward model using defined parameters
model = CommentClassifier()
for param in model.distilbert.parameters():
    param.requires_grad = False


# Initialize Adam optimizer with learning rate of 1e-3 and weight decay of 1e-5 (Help prevent overfitting)
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



CommentClassifier(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1)

In [None]:
from sklearn.model_selection import train_test_split

# Split it into train and validation portions with 80/20 split
train_df_split, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

#
#use this to reduce the training size
train_df_split = train_df_split.sample(frac=0.5, random_state=42)
val_df = val_df.sample(frac=0.5, random_state=42)


# Create separate dataset objects
train_dataset = ToxicDataset(train_df_split)
val_dataset = ToxicDataset(val_df)

# Create dataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import f1_score
import math, copy

#Define labels, thresholds, and class counts
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
class_counts = {
    "toxic": 15294,
    "severe_toxic": 1595,
    "obscene": 8449,
    "threat": 478,
    "insult": 7877,
    "identity_hate": 1405
}

# Thresholds for converting predicted probabilities into binary labels
'''
optimized_thresholds = {
    "toxic": 0.5082,
    "severe_toxic": 0.5408,
    "obscene": 0.6551,
    "threat": 0.1,
    "insult": 0.5082,
    "identity_hate": 0.1
}
'''

optimized_thresholds = {
    "toxic": 0.2,
    "severe_toxic": 0.2,
    "obscene": 0.2,
    "threat": 0.05,
    "insult": 0.2,
    "identity_hate": 0.05
}

# Compute weights using the square root ratio relative to the most frequent class
max_count = max(class_counts.values())
weights = [math.sqrt(max_count / class_counts[lbl]) for lbl in label_cols]
print("Computed weights:", {lbl: round(w, 2) for lbl, w in zip(label_cols, weights)})

# Convert the optimized thresholds into a numpy array
optimized_thresh_array = np.array([optimized_thresholds[lbl] for lbl in label_cols])
print("Optimized thresholds array:", optimized_thresh_array)

# Create a tensor for the weights, and use it in BCEWithLogitsLoss for multi-label classification
pos_weight = torch.tensor(weights, device=device)  # tensor of shape (num_classes,)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Evaluation Function
def evaluate(model, dataloader, criterion, device, thresh=0.5):
    """
    Runs the model on the validation set and returns:
      - average loss
      - per‑class F1 scores
      - macro‑averaged F1 score
      - raw probabilities (all_probs)
    """
    model.eval()  # Set model to evaluation mode
    val_loss, all_probs, all_labels = 0.0, [], []

    # No gradients needed for evalulation
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(logits, labels)

            val_loss += loss.item() * input_ids.size(0)

            probs = torch.sigmoid(logits)

            all_probs.append(probs.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    val_loss /= len(dataloader.dataset) # Average loss over the entire validation set

    # Concatenate all batch outputs into single arrays
    all_probs = np.concatenate(all_probs, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    # Convert probabilities to binary predictions using thresholds
    if np.isscalar(thresh):
        bin_preds = (all_probs >= thresh).astype(int)
        print("Predicted positives per class:", bin_preds.sum(axis=0))

    else:
        thresh = np.asarray(thresh)
        bin_preds = (all_probs >= thresh).astype(int)
        print("Predicted positives per class:", bin_preds.sum(axis=0))


    # Compute F1 scores for each label
    f1_per_class = [f1_score(all_labels[:, i], bin_preds[:, i], zero_division=0)
                    for i in range(all_labels.shape[1])]
    f1_macro = np.mean(f1_per_class)

    return val_loss, f1_per_class, f1_macro, all_probs, all_labels

# Main Training Loop
num_epochs      = 8
patience        = 3  # Stop if no improvement for 2 consecutive epochs
checkpoint_path = "best_macroF1_weighted.pt"
delta           = 1e-4  # Min. improvement threshold

best_macro_f1     = 0.0
epochs_no_improve = 0

# Loop over each epoch
for epoch in range(1, num_epochs + 1):
    model.train() # Set model to training mode
    running_loss = 0.0  # Accumulate loss over batches

    # Loop over training data
    for batch in train_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      optimizer.zero_grad()

      logits = model(input_ids=input_ids, attention_mask=attention_mask)
      loss = criterion(logits, labels)

      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
      optimizer.step()

      running_loss += loss.item() * input_ids.size(0)
    # Compute average training loss
    train_loss = running_loss / len(train_dataset)

    # Evaluate model on validation set and capture predictions
    val_loss, f1_per_class, f1_macro, all_probs, all_labels = evaluate(model, val_dataloader, criterion, device, thresh=optimized_thresh_array)


    # Save the best model, based on macro F1 - Use early stopping
    if f1_macro > best_macro_f1 + delta:
        best_macro_f1 = f1_macro
        epochs_no_improve = 0
        torch.save(model.state_dict(), checkpoint_path)
        print("New best model saved (macro‑F1)")
    else:
        epochs_no_improve += 1
        print(f"No macro‑F1 improvement for {epochs_no_improve} epoch(s)")

    print(f"Epoch {epoch}/{num_epochs}")
    print(f"  Train Loss  : {train_loss:.4f}")
    print(f"  Val   Loss  : {val_loss:.4f}")
    print(f"  F1 per class: {np.round(f1_per_class, 4).tolist()}")
    print(f"  F1 macro    : {f1_macro:.4f}")

    if epoch == 1:
      for param in model.distilbert.parameters():
          param.requires_grad = True
      print(" Unfroze BERT for full fine-tuning")
      optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)


    if epochs_no_improve >= patience:
        print("Early stopping on macro‑F1.")
        break

Computed weights: {'toxic': 1.0, 'severe_toxic': 3.1, 'obscene': 1.35, 'threat': 5.66, 'insult': 1.39, 'identity_hate': 3.3}
Optimized thresholds array: [0.2  0.2  0.2  0.05 0.2  0.05]
Predicted positives per class: [1607  229  937   47  884  259]
New best model saved (macro‑F1)
Epoch 1/8
  Train Loss  : 0.0296
  Val   Loss  : 0.0070
  F1 per class: [0.9703, 0.8286, 0.9526, 0.881, 0.9527, 0.6952]
  F1 macro    : 0.8801
 Unfroze BERT for full fine-tuning
Predicted positives per class: [1552  209  917   54  825  251]
New best model saved (macro‑F1)
Epoch 2/8
  Train Loss  : 0.0137
  Val   Loss  : 0.0048
  F1 per class: [0.9863, 0.8733, 0.9645, 0.8132, 0.9822, 0.7095]
  F1 macro    : 0.8882
Predicted positives per class: [1530  199  903   48  849  150]
New best model saved (macro‑F1)
Epoch 3/8
  Train Loss  : 0.0073
  Val   Loss  : 0.0047
  F1 per class: [0.9882, 0.8975, 0.9676, 0.8706, 0.9704, 0.9514]
  F1 macro    : 0.9410
Predicted positives per class: [1556  179  867   50  831  179]
N

In [None]:
import numpy as np
np.save("all_probs.npy", all_probs)
np.save("all_labels.npy", all_labels)

In [None]:
all_p = np.load("all_probs.npy")
all_l = np.load("all_labels.npy")

print("Shape of all_probs:", all_probs.shape)
print("Shape of all_labels:", all_labels.shape)
print("Preview of all_probs:")
print(all_probs[:5])
print("Preview of all_labels:")
print(all_labels[:5])

Shape of all_probs: (15958, 6)
Shape of all_labels: (15958, 6)
Preview of all_probs:
[[5.0580157e-10 3.5802760e-08 1.8598004e-09 8.4338815e-08 2.3970670e-09
  4.3016200e-08]
 [1.0224387e-10 4.5792232e-09 1.0565295e-10 5.0473421e-08 2.3714219e-10
  8.0597369e-09]
 [9.8463595e-01 6.1674213e-01 8.4560370e-01 1.1519099e-04 9.9499351e-01
  9.9997485e-01]
 [3.1222231e-09 4.2464880e-08 1.6876607e-09 1.0156429e-06 1.5791761e-08
  1.8919565e-07]
 [1.8589895e-06 2.6440705e-06 1.1236641e-05 1.7324023e-06 2.7202836e-06
  7.8526983e-07]]
Preview of all_labels:
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [1. 1. 0. 0. 1. 1.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


In [None]:
import numpy as np
import torch
from sklearn.metrics import (
    precision_recall_fscore_support,
    accuracy_score,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    f1_score,
)
from pathlib import Path

# Define labels and thresholds
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
optimized_thresholds = {
    "toxic": 0.2,
    "severe_toxic": 0.2,
    "obscene": 0.2,
    "threat": 0.05,
    "insult": 0.2,
    "identity_hate": 0.05
}
optimized_thresh_array = np.array([optimized_thresholds[lbl] for lbl in label_cols])
print("Optimized thresholds array:", optimized_thresh_array)

# Binarize predictions using thresholds
bin_preds = (all_probs >= optimized_thresh_array).astype(int)

# Compute Per‑label Precision, Recall, and F1 metrics
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, bin_preds, average=None, zero_division=0)
macro_f1 = np.mean(f1)

# Micro averaged metrics
micro_p, micro_r, micro_f1, _ = precision_recall_fscore_support(
    all_labels.ravel(), bin_preds.ravel(), average="micro", zero_division=0
)

subset_acc = accuracy_score(all_labels.tolist(), bin_preds.tolist())
roc_auc_macro = roc_auc_score(all_labels, all_probs, average="macro")
pr_auc_macro = average_precision_score(all_labels, all_probs, average="macro")

# Compute all label Confusion Matrices
conf_matrices = {}
jacc_per_label = []
for i, lbl in enumerate(label_cols):
    cm = confusion_matrix(all_labels[:, i], bin_preds[:, i])
    conf_matrices[lbl] = cm
    print(f"Confusion matrix for {lbl}:")
    print(cm)
    print()


print("\n===== Evaluation Metrics =====")
for i, lbl in enumerate(label_cols):
    print(f"{lbl:15s}  Precision: {prec[i]:.3f}  Recall: {rec[i]:.3f}  F1: {f1[i]:.3f}")
print("-------------------------------------------------")
print(f"Macro‑F1           : {macro_f1:.4f}")
print(f"Micro‑F1           : {micro_f1:.4f}")
print(f"Subset accuracy    : {subset_acc:.4f}")
print(f"ROC‑AUC (macro)    : {roc_auc_macro:.4f}")
print(f"PR‑AUC  (macro)    : {pr_auc_macro:.4f}")
print("=================================================\n")

Optimized thresholds array: [0.2  0.2  0.2  0.05 0.2  0.05]
Confusion matrix for toxic:
[[14391    47]
 [   25  1495]]

Confusion matrix for severe_toxic:
[[15767    29]
 [    3   159]]

Confusion matrix for obscene:
[[15038    64]
 [    4   852]]

Confusion matrix for threat:
[[15905    16]
 [    0    37]]

Confusion matrix for insult:
[[15081    69]
 [   10   798]]

Confusion matrix for identity_hate:
[[15794    26]
 [    0   138]]


===== Evaluation Metrics =====
toxic            Precision: 0.970  Recall: 0.984  F1: 0.976
severe_toxic     Precision: 0.846  Recall: 0.981  F1: 0.909
obscene          Precision: 0.930  Recall: 0.995  F1: 0.962
threat           Precision: 0.698  Recall: 1.000  F1: 0.822
insult           Precision: 0.920  Recall: 0.988  F1: 0.953
identity_hate    Precision: 0.841  Recall: 1.000  F1: 0.914
-------------------------------------------------
Macro‑F1           : 0.9226
Micro‑F1           : 0.9969
Subset accuracy    : 0.9833
ROC‑AUC (macro)    : 0.9998
PR‑AUC 

# Generate word embeddings to show similar words

In [None]:
import numpy as np

# Extract the embedding weights from the model's embedding layer
model.eval()
embedding_weights = model.embedding.weight.data.cpu().numpy()

# Build an inverse vocabulary (mapping index to word).
inv_vocab = {idx: word for word, idx in vocab.items()}

def cosine_similarity(a, b):
    """Compute the cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def find_similar_words(query_word, embedding_weights, vocab, inv_vocab, top_n=10):
    """Find the top_n words most similar to the query_word."""
    if query_word not in vocab:
        print(f"'{query_word}' not found in the vocabulary.")
        return []
    query_idx = vocab[query_word]
    query_vec = embedding_weights[query_idx]

    # Compute cosine similarity for every word in the vocabulary
    similarities = []
    for idx, vec in enumerate(embedding_weights):
        sim = cosine_similarity(query_vec, vec)
        similarities.append((inv_vocab[idx], sim))

    # Sort by similarity, highest first
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    similar_words = [(word, sim) for word, sim in similarities if word != query_word][1:top_n]
    return similar_words

# Find words similar to "sucker"
similar_words = find_similar_words("sucker", embedding_weights, vocab, inv_vocab, top_n=20)
print("Words similar to 'sucker':")
for word, sim in similar_words:
    print(f"{word}: {sim:.4f}")


Words similar to 'sucker':
badaga: 0.3560
ramped: 0.3459
provocateur: 0.3375
chicken: 0.3367
stupid: 0.3361
marathonios: 0.3318
sizzling: 0.3305
vicepresidential: 0.3296
infactual: 0.3289
twenties: 0.3268
ayoubmalouk: 0.3264
nao: 0.3208
moron: 0.3207
thong: 0.3205
dogshit: 0.3185
vcard: 0.3181
transitioned: 0.3150
ewhc: 0.3130
tormenting: 0.3130
