## Training a BERT Model

Importing the required libraries

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from transformers import TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

In [None]:
import json
import pandas as pd

# Loading the file for Model Training
file_path = "z639_assignment1_training.json"

with open(file_path, "r", encoding="utf-8") as file:
    data = [json.loads(line) for line in file]

# Converting to Pandas DataFrame
df = pd.DataFrame(data)

# Displaying the structure of the dataset
df.head()

Extracting the label with majority voting

In [None]:
# Function to determine if a given comment is toxic or not based on majority voting
def determine_toxicity(composite_toxic):
    toxicity_votes = [entry[0] for entry in composite_toxic]  # Extracting True/False votes
    return sum(toxicity_votes) > (len(toxicity_votes) / 2)  # Returns the label with Majority wins

# Applying the function to extract the final label
df["is_toxic"] = df["composite_toxic"].apply(determine_toxicity)

# Displaying only the necessary columns
df_cleaned = df[["text", "platform_id","is_toxic"]]
df_cleaned.head()

In [None]:
from huggingface_hub import notebook_login

notebook_login()

## Tokenization

In [None]:
from transformers import BertTokenizer

# Loading pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the entire dataset
encoded_data = tokenizer(
    df_cleaned["text"].tolist(),   # Tokenizes all the comments
    padding="max_length",          # Ensuring it has uniform input size
    truncation=True,               # Truncates longer comments
    max_length=128,                # Maximum token length
    return_tensors="pt"            # Returns PyTorch tensors
)

# Extracting tokenized inputs
input_ids = encoded_data["input_ids"]
attention_mask = encoded_data["attention_mask"]

# Checking tokenization output for first few samples
print(input_ids.shape)
print(attention_mask.shape)

## Converting Tokenized Data into PyTorch Dataset

In [None]:
import torch
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

# Defining a PyTorch Dataset Class
class ToxicityDataset(Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "input_ids": self.input_ids[idx],
            "attention_mask": self.attention_mask[idx],
            "labels": self.labels[idx],
        }

# Ensuring labels are in tensor format
labels = torch.tensor(df_cleaned["is_toxic"].values, dtype=torch.long)

# Splitting tokenized data
train_ids, val_ids, train_mask, val_mask, train_labels, val_labels = train_test_split(
    input_ids, attention_mask, labels, test_size=0.2, random_state=42
)


print("Train Labels Shape:", train_labels.shape)
print("Validation Labels Shape:", val_labels.shape)

# Defining Dataset Objects
train_dataset = ToxicityDataset(train_ids, train_mask, train_labels)
val_dataset = ToxicityDataset(val_ids, val_mask, val_labels)

## Creating DataLoaders

In [None]:
from torch.utils.data import DataLoader

# batch size recommended for BERT
batch_size = 16

# Creating DataLoader for Training
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Creating DataLoader for Validation
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Checking Batch Structure
batch = next(iter(train_dataloader))
print(batch["input_ids"].shape)

## Loading the trained BERT model

In [None]:
from transformers import BertConfig, BertForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Modifying dropout to prevent overfitting
config = BertConfig.from_pretrained(
    "bert-base-uncased",
    num_labels=2,
    hidden_dropout_prob=0.4,
    attention_probs_dropout_prob=0.4
)

#Loading model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", config=config)
model.to(device)

## Defining Training arguments

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    report_to="none",
    learning_rate=1e-5,  #Lower LR for fine-tuning
    lr_scheduler_type="cosine",  #Using cosine decay for better generalization
    load_best_model_at_end=True,  #Automatically loads the best model
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=2,  # Keeps only the best 2 models
    save_steps=100
)

In [None]:
import torch.nn as nn

# Computing class weights to Handle imbalance
toxic_count = df_cleaned["is_toxic"].sum()
non_toxic_count = len(df_cleaned) - toxic_count
class_weights = torch.tensor([1.0 / non_toxic_count, 1.0 / toxic_count], dtype=torch.float32).to(device)

# Defining weighted loss function
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

In [None]:
from transformers import Trainer, EarlyStoppingCallback

# Defining Trainer
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels").to(torch.long)  # Ensuring labels are long type
        outputs = model(**inputs)
        logits = outputs.logits
        loss = loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]  # Early stopping if no improvement
)


In [None]:
trainer.train()

The loss is decreasing significantly which indicates that the model is learning well.

## Model Evaluation

In [None]:
import torch
from torch.utils.data import DataLoader
import numpy as np

# Storing predictions and actual labels
all_preds = []
all_labels = []

# Evaluation mode
model.eval()

with torch.no_grad():
    for batch in val_dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        # Get model output
        outputs = model(input_ids, attention_mask=attention_mask)

        # Removing Adaptive Threshold
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        threshold = 0.55  # Fixed threshold instead of dynamic adjustment
        preds = (probs[:, 1] > threshold).long()

        # Store results
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert results to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# ✅ Computing Metrics
from sklearn.metrics import accuracy_score, classification_report

print("Model Evaluation:")
print("Model Accuracy:", accuracy_score(all_labels, all_preds))
print(classification_report(all_labels, all_preds))




The overall Model Accuracy is almost 75% which is decent. The classification report shows that the model is better at detecting toxic comments than avoiding false alarms. High Precision for Non-Toxic shows Very few non-toxic comments are incorrectly flagged as toxic.
High Recall for Toxic shows most toxic comments are detected correctly.

Visualizing the confusion Matrix

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

conf_matrix = confusion_matrix(all_labels, all_preds)

# Plot Confusion Matrix
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Non-Toxic", "Toxic"], yticklabels=["Non-Toxic", "Toxic"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

440 correct non-toxic classifications (True Negatives)

159 correctly detected toxic comments (True Positives)

150 false positives (non-toxic comments wrongly marked toxic)

51 false negatives (missed toxic comments)



In [None]:
# Saving model and tokenizer
model.save_pretrained("bert_toxic_classifier")
tokenizer.save_pretrained("bert_toxic_classifier")

print("Model saved successfully!")

## Predictions on Test Dataset

In [None]:
import json
import pandas as pd

# Loading Test Dataset
test_file_path = "z639_assignment1_test.json"

with open(test_file_path, "r", encoding="utf-8") as file:
    test_data = [json.loads(line) for line in file]

df_test = pd.DataFrame(test_data)
test_texts = df_test["text"].tolist()

## Tokenization of Test dataset

In [None]:
#Tokenize Test Dataset
encoded_test = tokenizer(
    test_texts,
    padding="max_length",
    truncation=True,
    max_length=128,
    return_tensors="pt"
)

test_input_ids = encoded_test["input_ids"]
test_attention_mask = encoded_test["attention_mask"]

In [None]:
def predict_toxicity(input_ids, attention_mask):
    inputs = {
        "input_ids": input_ids.to(device),
        "attention_mask": attention_mask.to(device)
    }
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return True if torch.argmax(probs) == 1 else False

#Prediction for All Test Comments
df_test["prediction"] = [
    predict_toxicity(test_input_ids[i].unsqueeze(0), test_attention_mask[i].unsqueeze(0))
    for i in range(len(test_texts))
]

# Converting Boolean Predictions to Lowercase Strings
df_test["prediction"] = df_test["prediction"].astype(str).str.lower()

# Displaying Predictions
print(df_test[["platform_id", "prediction"]].head(5))

In [None]:
# Saving the prediction file
Prediction = df_test[["platform_id", "prediction"]]
Prediction.to_csv("Prediction.csv", index=False)

## Trainin a SVM Model

Importing libraries

In [None]:
import pandas as pd
import numpy as np
import json
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

In [None]:
# Loading training dataset
train_file_path = "z639_assignment1_training.json"
with open(train_file_path, "r") as file:
    train_data = [json.loads(line) for line in file]

# Converting to DataFrame
df_train = pd.DataFrame(train_data)

# Checking the structure
df_train.head()

## Preprocessing the data

In [None]:
import re
import pandas as pd
from nltk.corpus import stopwords

# Function to clean text
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)  # Remove URLs
    text = re.sub(r"[^a-zA-Z\s]", "", text)  # Remove special characters and numbers
    text = " ".join([word for word in text.split() if word not in stopwords.words("english")])  # Remove stopwords
    return text

# Applying text cleaning
df_train["clean_text"] = df_train["text"].fillna("").apply(clean_text)

# Function to get majority vote on toxicity
def get_majority_label(toxic_list):
    return sum(label[0] for label in toxic_list) > len(toxic_list) / 2  # Returns True or False based on majority vote

# Applying majority vote to get final label
df_train["is_toxic"] = df_train["composite_toxic"].apply(get_majority_label).astype(bool)
df_train = df_train[["clean_text", "is_toxic"]]
print(df_train.head())


## Special Treatment

In [None]:
# TF-IDF Vectorization to convert text data into numerical features
vectorizer = TfidfVectorizer(max_features=5000, stop_words="english", ngram_range=(1,2))
X_train_tfidf = vectorizer.fit_transform(df_train["clean_text"])
y_train = df_train["is_toxic"]

# Converting to array
X_train = X_train_tfidf.toarray()

In [None]:
X_train_final, X_val, y_train_final, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

## Training the model

In [None]:
# Define SVM model
svm_model = SVC(kernel="linear")

# Perform 5-Fold Cross-Validation
cv_scores = cross_val_score(svm_model, X_train_final, y_train_final, cv=5, scoring="accuracy")

# Print results
print(f"Cross-Validation Accuracy Scores: {cv_scores}")
print(f"Mean CV Accuracy: {cv_scores.mean():.4f}")

# Train final SVM model
svm_model.fit(X_train_final, y_train_final)

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict on training data (for evaluation)
y_train_pred = svm_model.predict(X_train_final)

# Model evaluation
accuracy = accuracy_score(y_train_final, y_train_pred)
classification_rep = classification_report(y_train_final, y_train_pred)
conf_matrix = confusion_matrix(y_train_final, y_train_pred)

print("Model Evaluation")
print(f"Model Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_rep)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Compute Confusion Matrix
conf_matrix = confusion_matrix(y_train_final, y_train_pred)

# Plot Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Non-Toxic", "Toxic"], yticklabels=["Non-Toxic", "Toxic"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
import json
import pandas as pd

# Load test dataset
test_file_path = "z639_assignment1_test.json"

with open(test_file_path, 'r', encoding='utf-8') as f:
    test_data = [json.loads(line) for line in f]

df_test = pd.DataFrame(test_data)

In [None]:
# Apply text cleaning function
df_test["clean_text"] = df_test["text"].fillna("").apply(clean_text)

print("Test dataset loaded and preprocessed.")
print(df_test.head())

In [None]:
# Transform test text using the trained TF-IDF vectorizer
X_test_tfidf = vectorizer.transform(df_test["clean_text"])

print("Test data transformed using TF-IDF.")

In [None]:
# Prediction using trained SVM model
test_predictions = svm_model.predict(X_test_tfidf.toarray())

# Storing predictions in DataFrame (as True/False)
df_test["predicted_toxicity"] = test_predictions

In [None]:
# Save predictions to a CSV file
df_test[["platform_id", "predicted_toxicity"]].to_csv("svm_test_predictions.csv", index=False)

# Show sample predictions
print(df_test[["platform_id", "predicted_toxicity"]].head(20))