In [None]:
!pip install --upgrade pip
!pip install --upgrade transformers datasets torch scikit-learn

In [None]:
!pip install evaluate

In [None]:
!pip install wandb

In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer
from datasets import load_dataset
import evaluate
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support


In [None]:
df = pd.read_csv('/content/train.csv')
df

In [None]:
df["toxic_label"] = df[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].max(axis=1)
df = df[["comment_text", "toxic_label"]]


In [None]:
# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(df)
dataset

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Tokenize function
def tokenize_function(examples):
    return tokenizer(examples["comment_text"], padding="max_length", truncation=True)

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Remove original text column
tokenized_datasets = tokenized_datasets.remove_columns(["comment_text"])
tokenized_datasets = tokenized_datasets.rename_column("toxic_label", "labels")
tokenized_datasets.set_format("torch")

# Split into train and validation
train_dataset = tokenized_datasets.train_test_split(test_size=0.1)["train"]
val_dataset = tokenized_datasets.train_test_split(test_size=0.1)["test"]

print("Dataset is ready!")

In [None]:
# Load model
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.001,
    logging_dir="./logs",
    logging_steps=1000,
    load_best_model_at_end=True,
    fp16=True,
    report_to="none",
)


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)

    # Calculate precision within compute_metrics
    precision = evaluate.load("precision").compute(predictions=predictions, references=labels)

    accuracy = metric.compute(predictions=predictions, references=labels)
    recall = evaluate.load("recall").compute(predictions=predictions, references=labels)
    f1 = evaluate.load("f1").compute(predictions=predictions, references=labels)
    return {"accuracy": accuracy["accuracy"], "precision": precision["precision"], "recall": recall["recall"], "f1": f1["f1"]}

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [None]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

In [None]:
test_results = trainer.evaluate(val_dataset)
print("Evaluation Results:", test_results)

In [None]:
# Inference function with proper device handling
def classify_text(text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Ensure the model is on the correct device

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)  # Move inputs to the same device
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=-1).item()
    return "Unsafe" if prediction == 1 else "Safe"


In [None]:
# Test with some examples
test_texts = [
    "I appreciate your help, thank you!",
    "You are so stupid and annoying!",
]

for text in test_texts:
    print(f"Text: {text} -> Classification: {classify_text(text)}")

# Documentation & Deployment
# Readme/Notebook Documentation:
# - Approach: Fine-tuned DistilBERT for binary classification of safe vs. unsafe text.
# - Hyperparameters: Optimized for Colab with reduced batch size and minimal epochs.
# - Trade-offs: Limited training time vs. performance trade-off.

# Potential Extensions:
# - Could be deployed in a real-time system using a FastAPI or Flask API.
# - Integrated with moderation tools to filter out unsafe content before generation.
# - Further fine-tuning on a larger dataset could improve performance in real-world applications.