In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, hamming_loss, f1_score
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer, EarlyStoppingCallback,
    DataCollatorWithPadding
)
import warnings
warnings.filterwarnings('ignore')
import gradio as gr
import re
import ftfy
import io
import os
os.environ["WANDB_DISABLED"] = "true"
import random

**IMPORT THE DATASET**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/toxic_comment_detector/train.csv')
df.head(25)


In [None]:
df.info()

**CLEANING THE DATASET**

In [None]:
#check missing values
df.isna().sum()

In [None]:
#Replace newlines, tabs, carriage returns with space
df['comment_text'] = df['comment_text'].apply(lambda x: re.sub(r'[\n\r\t]', ' ', x))

In [None]:
#Strip leading and trailing whitespace
df['comment_text'] = df['comment_text'].apply(lambda x: x.strip())

In [None]:
#Remove excessive spaces
df['comment_text'] = df['comment_text'].apply(lambda x: re.sub(r'\s+', ' ', x))


In [None]:
#Fix Encoding Artifacts (like â€™, Ã©)
df['comment_text'] = df['comment_text'].apply(lambda x: ftfy.fix_text(x))

In [None]:
excluded_cols = ['id', 'comment_text']

for col in df.columns:
    if col not in excluded_cols:
        print(f"\nColumn: {col}")
        print(df[col].unique()) #print all value for labels

In [None]:
df.head(30)

**TRAIN THE MODELS**

In [None]:
# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

class ToxicDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128, model_name=""):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.model_name = model_name.lower()

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])

        # Handle different tokenizer requirements
        tokenizer_kwargs = {
            'text': text,
            'add_special_tokens': True,
            'max_length': self.max_len,
            'padding': 'max_length',
            'truncation': True,
            'return_attention_mask': True,
            'return_tensors': 'pt'
        }

        # Only add token_type_ids for models that support it (not DistilBERT)
        if 'distilbert' not in self.model_name:
            tokenizer_kwargs['return_token_type_ids'] = True

        inputs = self.tokenizer.encode_plus(**tokenizer_kwargs)

        result = {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.FloatTensor(self.labels[idx])
        }

        # Add token_type_ids only if it exists in inputs
        if 'token_type_ids' in inputs:
            result['token_type_ids'] = inputs['token_type_ids'].flatten()

        return result

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = torch.sigmoid(torch.tensor(predictions)).numpy()

    # Convert to binary predictions
    binary_predictions = (predictions > 0.5).astype(int)

    # Calculate metrics
    auc_scores = []
    f1_scores = []

    for i in range(labels.shape[1]):
        if len(np.unique(labels[:, i])) > 1:  # Check if both classes exist
            auc = roc_auc_score(labels[:, i], predictions[:, i])
            auc_scores.append(auc)
            f1 = f1_score(labels[:, i], binary_predictions[:, i])
            f1_scores.append(f1)

    return {
        'auc': np.mean(auc_scores),
        'f1': np.mean(f1_scores)
    }

In [None]:
class ToxicCommentDetector:
    def __init__(self):
        self.models = {}
        self.tokenizers = {}
        self.label_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

        # Model configurations optimized for Colab
        self.model_configs = {
            'DistilBERT': {
                'name': 'distilbert-base-uncased',
                'max_len': 128,
                'batch_size': 16,
                'epochs': 3,
                'lr': 2e-5
            },
            'RoBERTa': {
                'name': 'roberta-base',
                'max_len': 128,
                'batch_size': 8,
                'epochs': 3,
                'lr': 1e-5
            },
            'ALBERT': {
                'name': 'albert-base-v2',
                'max_len': 128,
                'batch_size': 16,
                'epochs': 3,
                'lr': 3e-5
            },
            'electra-small': {
                'name': 'google/electra-small-discriminator',
                'max_len': 128,
                'batch_size': 16,
                'epochs': 3,
                'lr': 2e-5
            }
        }

    def load_and_preprocess_data(self, df):
        """Load and preprocess the dataset"""
        print("📊 Dataset Overview:")
        print(f"Total samples: {len(df)}")
        print(f"Columns: {df.columns.tolist()}")

        # Check label distribution
        print("\n📈 Label Distribution:")
        for col in self.label_columns:
            positive_ratio = df[col].mean()
            print(f"{col}: {positive_ratio:.3f} ({positive_ratio*100:.1f}% positive)")

        # Sample down for faster training (adjust based on your needs)
        # Using stratified sampling to maintain label distribution
        sample_size = min(50000, len(df))  # Adjust this based on your resources
        if len(df) > sample_size:
            print(f"\n🎯 Sampling {sample_size} examples for faster training...")
            df_sampled = df.sample(n=sample_size, random_state=42)
        else:
            df_sampled = df.copy()

        # Split the data
        X = df_sampled['comment_text'].values
        y = df_sampled[self.label_columns].values

        X_train, X_temp, y_train, y_temp = train_test_split(
            X, y, test_size=0.3, random_state=42, stratify=y[:, 0]  # Stratify on toxic label
        )

        X_val, X_test, y_val, y_test = train_test_split(
            X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp[:, 0]
        )

        print(f"\n📋 Data Split:")
        print(f"Training: {len(X_train)} samples")
        print(f"Validation: {len(X_val)} samples")
        print(f"Testing: {len(X_test)} samples")

        return X_train, X_val, X_test, y_train, y_val, y_test

    def train_model(self, model_name, X_train, X_val, y_train, y_val):
        print(f"\n🚀 Training {model_name}...")

        config = self.model_configs[model_name]

        # Load tokenizer and model
        tokenizer = AutoTokenizer.from_pretrained(config['name'])
        model = AutoModelForSequenceClassification.from_pretrained(
            config['name'],
            num_labels=len(self.label_columns),
            problem_type="multi_label_classification"
        )

        # Create datasets
        train_dataset = ToxicDataset(X_train, y_train, tokenizer, config['max_len'], model_name)
        val_dataset = ToxicDataset(X_val, y_val, tokenizer, config['max_len'], model_name)

        # Training arguments with hyperparameter tuning
        training_args = TrainingArguments(
            output_dir=f'./results_{model_name.lower()}',
            num_train_epochs=config['epochs'],
            per_device_train_batch_size=config['batch_size'],
            per_device_eval_batch_size=config['batch_size'],
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir=f'./logs_{model_name.lower()}',
            logging_steps=100,
            eval_strategy="steps",
            eval_steps=500,
            save_strategy="steps",
            save_steps=500,
            load_best_model_at_end=True,
            metric_for_best_model="auc",
            greater_is_better=True,
            learning_rate=config['lr'],
            adam_epsilon=1e-8,
            max_grad_norm=1.0,
            fp16=True if torch.cuda.is_available() else False,
            dataloader_num_workers=0,
            save_total_limit=1,
        )

        # Initialize trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
        )

        # Train the model
        trainer.train()

        # Save model and tokenizer
        model_dir = f"/content/drive/MyDrive/toxic comment detector/{model_name}"
        model.save_pretrained(model_dir)
        tokenizer.save_pretrained(model_dir)
        print(f"📦 Model and tokenizer saved to {model_dir}")

        # Save references for later use
        self.models[model_name] = model
        self.tokenizers[model_name] = tokenizer

        # Evaluate on validation set
        eval_results = trainer.evaluate()
        print(f"✅ {model_name} - Validation AUC: {eval_results['eval_auc']:.4f}, F1: {eval_results['eval_f1']:.4f}")

        return eval_results


    def predict(self, text, model_name):
        """Make predictions using a specific model"""
        if model_name not in self.models:
            raise ValueError(f"Model {model_name} not trained yet!")

        model = self.models[model_name]
        tokenizer = self.tokenizers[model_name]

        # Check if model is on CUDA, if so move to CPU for prediction
        device = next(model.parameters()).device

        # Tokenize input - handle DistilBERT token_type_ids issue
        tokenizer_kwargs = {
            'text': text,
            'add_special_tokens': True,
            'max_length': 128,
            'padding': 'max_length',
            'truncation': True,
            'return_attention_mask': True,
            'return_tensors': 'pt'
        }

        # Only add token_type_ids for models that support it (not DistilBERT)
        if 'distilbert' not in model_name.lower():
            tokenizer_kwargs['return_token_type_ids'] = True

        inputs = tokenizer.encode_plus(**tokenizer_kwargs)

        # Move inputs to the same device as model
        for key in inputs:
            inputs[key] = inputs[key].to(device)

        # Make prediction
        model.eval()
        with torch.no_grad():
            outputs = model(**inputs)
            predictions = torch.sigmoid(outputs.logits).cpu().numpy()[0]

        # Create results dictionary
        results = {}
        for i, label in enumerate(self.label_columns):
            results[label] = float(predictions[i])

        return results

    def evaluate_all_models(self, X_test, y_test):
        """Evaluate all trained models on test set"""
        results = {}

        for model_name in self.models.keys():
            print(f"\n🔍 Evaluating {model_name} on test set...")

            model = self.models[model_name]
            tokenizer = self.tokenizers[model_name]

            # Create test dataset
            test_dataset = ToxicDataset(X_test, y_test, tokenizer, 128, model_name)

            # Create trainer for evaluation
            trainer = Trainer(
                model=model,
                compute_metrics=compute_metrics,
            )

            # Evaluate
            eval_results = trainer.evaluate(test_dataset)
            results[model_name] = {
                'auc': eval_results['eval_auc'],
                'f1': eval_results['eval_f1']
            }

            print(f"📊 {model_name} - Test AUC: {eval_results['eval_auc']:.4f}, F1: {eval_results['eval_f1']:.4f}")

        return results

In [None]:
# Main training pipeline
def train_toxic_detector(df):
    """Complete training pipeline"""
    detector = ToxicCommentDetector()

    # Load and preprocess data
    X_train, X_val, X_test, y_train, y_val, y_test = detector.load_and_preprocess_data(df)

    # Train all models
    validation_results = {}
    for model_name in detector.model_configs.keys():
        try:
            eval_results = detector.train_model(model_name, X_train, X_val, y_train, y_val)
            validation_results[model_name] = eval_results
        except Exception as e:
            print(f"❌ Error training {model_name}: {str(e)}")
            continue

    # Evaluate on test set
    test_results = detector.evaluate_all_models(X_test, y_test)

    # Print summary
    print("\n" + "="*50)
    print("📋 FINAL RESULTS SUMMARY")
    print("="*50)

    results_df = pd.DataFrame({
        'Model': list(test_results.keys()),
        'Test_AUC': [results['auc'] for results in test_results.values()],
        'Test_F1': [results['f1'] for results in test_results.values()]
    })

    results_df = results_df.sort_values('Test_AUC', ascending=False)
    print(results_df.to_string(index=False))

    return detector, results_df

In [None]:
# Gradio Interface
def create_gradio_interface(detector):
    """Create Gradio interface for the app"""

    def predict_toxicity(text, model_name):
        """Predict toxicity for given text"""
        if not text.strip():
            return "Please enter some text to analyze."

        try:
            results = detector.predict(text, model_name)

            # Format results
            output = f"🔍 **Analysis Results using {model_name}:**\n\n"
            for label, score in results.items():
                emoji = "🚨" if score > 0.5 else "✅"
                output += f"{emoji} **{label.replace('_', ' ').title()}**: {score:.3f} ({score*100:.1f}%)\n"

            return output

        except Exception as e:
            return f"Error: {str(e)}"

    def compare_models(text):
        """Compare all models for the same text"""
        if not text.strip():
            return "Please enter some text to analyze.", None

        try:
            all_results = {}
            for model_name in detector.models.keys():
                results = detector.predict(text, model_name)
                all_results[model_name] = results

            # Create comparison chart
            fig, ax = plt.subplots(figsize=(12, 8))

            models = list(all_results.keys())
            labels = detector.label_columns
            x = np.arange(len(labels))
            width = 0.25

            for i, model in enumerate(models):
                scores = [all_results[model][label] for label in labels]
                ax.bar(x + i*width, scores, width, label=model, alpha=0.8)

            ax.set_xlabel('Toxicity Categories')
            ax.set_ylabel('Probability Score')
            ax.set_title(f'Model Comparison for: "{text[:50]}..."')
            ax.set_xticks(x + width)
            ax.set_xticklabels([label.replace('_', ' ').title() for label in labels], rotation=45)
            ax.legend()
            ax.grid(True, alpha=0.3)
            plt.tight_layout()

            # Format text results
            comparison_text = "📊 **Model Comparison Results:**\n\n"
            for model_name, results in all_results.items():
                comparison_text += f"**{model_name}:**\n"
                for label, score in results.items():
                    emoji = "🚨" if score > 0.5 else "✅"
                    comparison_text += f"  {emoji} {label.replace('_', ' ').title()}: {score:.3f}\n"
                comparison_text += "\n"

            return comparison_text, fig

        except Exception as e:
            return f"Error: {str(e)}", None

    # Create Gradio interface
    with gr.Blocks(title="🛡️ Toxic Comment Detector", theme=gr.themes.Soft()) as interface:
        gr.Markdown("""
        # 🛡️ Toxic Comment Detector

        This app uses four different pre-trained models to detect toxicity in comments.
        Enter your text below and choose a model to get predictions, or compare all models at once!
        """)

        with gr.Tab("Single Model Prediction"):
            with gr.Row():
                with gr.Column():
                    text_input = gr.Textbox(
                        label="Enter comment to analyze",
                        placeholder="Type your comment here...",
                        lines=3
                    )
                    model_dropdown = gr.Dropdown(
                        choices=list(detector.models.keys()),
                        label="Select Model",
                        value=list(detector.models.keys())[0] if detector.models else None
                    )
                    predict_btn = gr.Button("🔍 Analyze Toxicity", variant="primary")

                with gr.Column():
                    single_output = gr.Markdown(label="Results")

            predict_btn.click(
                predict_toxicity,
                inputs=[text_input, model_dropdown],
                outputs=single_output
            )

        with gr.Tab("Compare All Models"):
            with gr.Row():
                with gr.Column():
                    compare_text_input = gr.Textbox(
                        label="Enter comment to analyze",
                        placeholder="Type your comment here...",
                        lines=3
                    )
                    compare_btn = gr.Button("📊 Compare All Models", variant="primary")

                with gr.Column():
                    compare_output = gr.Markdown(label="Comparison Results")

            compare_plot = gr.Plot(label="Visual Comparison")

            compare_btn.click(
                compare_models,
                inputs=compare_text_input,
                outputs=[compare_output, compare_plot]
            )

        gr.Markdown("""
        ---
        ### 📝 Model Information:
        - **DistilBERT**: Lightweight and fast, good for real-time applications
        - **RoBERTa**: Robust and accurate, optimized training approach
        - **ALBERT**: Parameter-efficient, good balance of speed and accuracy
        - **ELECTRA-Small**: Very lightweight and fast, pre-trained with a novel discriminator approach

        ### 🏷️ Labels Explained:
        - **Toxic**: General toxicity
        - **Severe Toxic**: Extremely toxic content
        - **Obscene**: Obscene language
        - **Threat**: Threatening language
        - **Insult**: Insulting content
        - **Identity Hate**: Hate speech targeting identity groups
        """)

    return interface

In [None]:
#train models
detector, results=train_toxic_detector(df)

In [None]:
#lauch gradio interface
interface=create_gradio_interface(detector)
interface.launch()

In [None]:
#this is to run instantly, no need to retrain the models again
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Rebuild detector instance
detector = ToxicCommentDetector()

# List of saved models
model_names = ["DistilBERT", "RoBERTa", "ALBERT", "electra-small"]

# Load models from Drive
for model_name in model_names:
    try:
        model_path = f"/content/drive/MyDrive/toxic comment detector/{model_name}"
        model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=len(detector.label_columns))
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        detector.models[model_name] = model
        detector.tokenizers[model_name] = tokenizer
        print(f"✅ Loaded {model_name}")
    except Exception as e:
        print(f"❌ Failed to load {model_name}: {e}")

# Launch Gradio UI
gr_interface = create_gradio_interface(detector)
gr_interface.launch()