In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_auc_score, roc_curve, auc
import os

# Model Names
MODEL_NAMES = ["electra", "deberta", "roberta", "hatebert"]

# Load Test Dataset
df = pd.read_csv("D:/Project Phase 1/Toxic-Comment-Classification-Challenge-master/shuffled_dataset.csv")  # Update with actual path

# Map text labels to numerical values
label_mapping = {
    "non_toxic": 2,
    "moderately_toxic": 1,
    "severely_toxic": 0
}
df["label"] = df["label"].map(label_mapping)

comments = df["comment_text"].tolist()
true_labels = df["label"].values  # Numerical labels (0, 1, 2)

# Directory containing saved predictions
SAVE_DIR = "D:/Project Phase 1/predictions"

# Load predictions from saved .npy files
ensemble_preds = np.zeros((len(comments), 3))  # Assuming 3 classes: 0, 1, 2

for model_name in MODEL_NAMES:
    save_path = os.path.join(SAVE_DIR, f"{model_name}_predictions.npy")

    if os.path.exists(save_path):
        preds = np.load(save_path)
        if preds.shape != (len(comments), 3):
            raise ValueError(f"Mismatch in predictions shape for {model_name}. Expected {(len(comments), 3)}, got {preds.shape}")
        
        ensemble_preds += preds / len(MODEL_NAMES)  # Uniform weighting
        print(f"Loaded predictions from {save_path}")
    else:
        raise FileNotFoundError(f"Prediction file not found: {save_path}")

# Save ensemble predictions using pandas
df_preds = pd.DataFrame(ensemble_preds, columns=["Non-Toxic", "Moderately Toxic", "Severely Toxic"])
df_preds.to_csv("ensemble_predictions.csv", index=False)

# Final Prediction based on Averaging
final_predictions = np.argmax(ensemble_preds, axis=1)

# Evaluate Model
accuracy = accuracy_score(true_labels, final_predictions)
f1 = f1_score(true_labels, final_predictions, average="macro")
conf_matrix = confusion_matrix(true_labels, final_predictions)
class_report = classification_report(true_labels, final_predictions)
auc_score = roc_auc_score(true_labels, ensemble_preds, multi_class="ovr")

# Print Results
print(f"Ensemble Model Accuracy: {accuracy:.4f}")
print(f"Ensemble Model F1-Score: {f1:.4f}")
print("Confusion Matrix:/n", conf_matrix)
print("Classification Report:/n", class_report)
print(f"Ensemble Model AUC: {auc_score:.4f}")

# Function to Save Evaluation Report
def save_evaluation_report(accuracy, f1, conf_matrix, class_report, auc_score, true_labels, ensemble_preds, label_mapping):
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))  # 2x2 grid

    # Plot Confusion Matrix
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_mapping.keys(), yticklabels=label_mapping.keys(), ax=axes[0, 0])
    axes[0, 0].set_xlabel("Predicted Label")
    axes[0, 0].set_ylabel("True Label")
    axes[0, 0].set_title("Confusion Matrix")

    # Plot ROC Curves
    for i, label in enumerate(label_mapping.keys()):
        fpr, tpr, _ = roc_curve(true_labels == i, ensemble_preds[:, i])
        axes[0, 1].plot(fpr, tpr, label=f"Class {label} (AUC: {auc(fpr, tpr):.4f})")

    axes[0, 1].plot([0, 1], [0, 1], "k--")
    axes[0, 1].set_xlabel("False Positive Rate")
    axes[0, 1].set_ylabel("True Positive Rate")
    axes[0, 1].set_title("ROC Curves for Each Class")
    axes[0, 1].legend()

    # Accuracy, F1, AUC Score
    metrics_text = f"""
    Accuracy: {accuracy:.4f}
    F1 Score: {f1:.4f}
    AUC Score: {auc_score:.4f}
    """
    axes[1, 0].text(0.5, 0.5, metrics_text, fontsize=12, ha="center", va="center", bbox=dict(boxstyle="round", facecolor="lightgray", alpha=0.5))
    axes[1, 0].set_axis_off()
    axes[1, 0].set_title("Evaluation Metrics")

    # Classification Report
    axes[1, 1].text(0.5, 0.5, class_report, fontsize=10, ha="center", va="center", bbox=dict(boxstyle="round", facecolor="lightgray", alpha=0.5))
    axes[1, 1].set_axis_off()
    axes[1, 1].set_title("Classification Report")

    plt.tight_layout()
    
    # Save Report
    plt.savefig("evaluation_report.png", dpi=300)
    print("Evaluation report saved as 'evaluation_report.png'")

# Call function to save evaluation report
save_evaluation_report(accuracy, f1, conf_matrix, class_report, auc_score, true_labels, ensemble_preds, label_mapping)


ValueError: Mismatch in predictions shape for electra. Expected (156638, 3), got (63751, 3)

# ***OUTPUT***

In [3]:
Loading saved predictions for electra
Loading saved predictions for deberta
Processing roberta:   0%|          | 0/127936 [00:00<?, ?comments/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Processing roberta: 100%|██████████| 127936/127936 [17:39<00:00, 120.69comments/s]
Saved predictions for roberta at model_predictions/roberta_predictions.npy
Processing hatebert: 100%|██████████| 127936/127936 [17:29<00:00, 121.86comments/s]
Saved predictions for hatebert at model_predictions/hatebert_predictions.npy
Ensemble Model Accuracy: 0.9767
Ensemble Model F1-Score: 0.9767
Confusion Matrix:
 [[41683   839    39]
 [  180 41200  1164]
 [    3   757 42071]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99     42561
           1       0.96      0.97      0.97     42544
           2       0.97      0.98      0.98     42831

    accuracy                           0.98    127936
   macro avg       0.98      0.98      0.98    127936
weighted avg       0.98      0.98      0.98    127936

Ensemble Model AUC: 0.9981

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 17)

# ***USER INPUT***

In [10]:


import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import os

# Model Paths (Update these paths accordingly)
MODEL_PATHS = {
    "electra": "D:/Project Phase 1/Fine_Tuned Models/best_electra_model (1)",
    "deberta": "D:/Project Phase 1/Fine_Tuned Models/best_deberta",
    "roberta": "D:/Project Phase 1/Fine_Tuned Models/updated_twitter_roberta",
    "hatebert": "D:/Project Phase 1/Fine_Tuned Models/best_hatebert",
}

# Load Models and Tokenizers
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for name, path in MODEL_PATHS.items():
    tokenizers[name] = AutoTokenizer.from_pretrained(path)
    models[name] = AutoModelForSequenceClassification.from_pretrained(path).to(device).eval()

# Label Mapping
label_mapping = {0: "Severely Toxic", 1: "Moderately Toxic", 2: "Non-Toxic"}

def classify_comment(comment):
    """Classifies a given user input comment using the ensemble model."""
    ensemble_preds = np.zeros(3)  # Assuming 3 classes: 0, 1, 2
    
    for model_name in models:
        tokenizer = tokenizers[model_name]
        model = models[model_name]
        
        inputs = tokenizer(comment, padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs).logits
        preds = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()[0]
        
        ensemble_preds += preds / len(models)  # Uniform weighting
    
    predicted_label = np.argmax(ensemble_preds)
    return label_mapping[predicted_label]

# User Input Loop
print("/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):")
while True:
    user_input = input("/nEnter comment: ")
    if user_input.lower() == "exit":
        print("Exiting...👋")
        break
    result = classify_comment(user_input)
    print(f"🚀 Prediction: {result}")




/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


🚀 Prediction: Moderately Toxic
🚀 Prediction: Moderately Toxic
🚀 Prediction: Non-Toxic
🚀 Prediction: Non-Toxic
Exiting...👋


In [16]:

import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import os

# Model Paths (Update these paths accordingly)
MODEL_PATHS = {
    "electra": "D:/Project Phase 1/Fine_Tuned Models/best_electra_model (1)",
    "deberta": "D:/Project Phase 1/Fine_Tuned Models/best_deberta",
    "roberta": "D:/Project Phase 1/Fine_Tuned Models/updated_twitter_roberta",
    "hatebert": "D:/Project Phase 1/Fine_Tuned Models/best_hatebert",
}

# Load Models and Tokenizers
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for name, path in MODEL_PATHS.items():
    tokenizers[name] = AutoTokenizer.from_pretrained(path)
    models[name] = AutoModelForSequenceClassification.from_pretrained(path).to(device).eval()

# Label Mapping
label_mapping = {0: "Severely Toxic", 1: "Moderately Toxic", 2: "Non-Toxic"}

def classify_comment(comment):
    """Classifies a given user input comment using the ensemble model and displays individual model probabilities."""
    ensemble_preds = np.zeros(3)  # Assuming 3 classes: 0, 1, 2
    model_probs = {}
    
    for model_name in models:
        # if model_name == "roberta":
        #     # Force RoBERTa to always classify as "Severely Toxic"
        #     probs = np.array([1.0, 0.0, 0.0])  # 100% Severely Toxic
        # else:
        tokenizer = tokenizers[model_name]
        model = models[model_name]
        
        inputs = tokenizer(comment, padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs).logits
        probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()[0]

        model_probs[model_name] = probs
        ensemble_preds += probs / len(models)  # Uniform weighting
    
    predicted_label = np.argmax(ensemble_preds)
    
    print("/n🔍 Model Probabilities:")
    for model_name, probs in model_probs.items():
        print(f"{model_name}: Non-Toxic: {probs[2]:.4f}, Moderately Toxic: {probs[1]:.4f}, Severely Toxic: {probs[0]:.4f}")
    
    print("/n🎯 Ensemble Probabilities:")
    print(f"Non-Toxic: {ensemble_preds[2]:.4f}, Moderately Toxic: {ensemble_preds[1]:.4f}, Severely Toxic: {ensemble_preds[0]:.4f}")
    
    return label_mapping[predicted_label]

# User Input Loop
print("/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):")
while True:
    user_input = input("/nEnter comment: ")
    if user_input.lower() == "exit":
        print("Exiting...👋")
        break
    result = classify_comment(user_input)
    print(f"🚀 Prediction: {result}")


/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


/n🔍 Model Probabilities:
electra: Non-Toxic: 0.9720, Moderately Toxic: 0.0279, Severely Toxic: 0.0001
deberta: Non-Toxic: 0.1366, Moderately Toxic: 0.8629, Severely Toxic: 0.0005
roberta: Non-Toxic: 0.7953, Moderately Toxic: 0.2047, Severely Toxic: 0.0000
hatebert: Non-Toxic: 0.0006, Moderately Toxic: 0.9993, Severely Toxic: 0.0001
/n🎯 Ensemble Probabilities:
Non-Toxic: 0.4761, Moderately Toxic: 0.5237, Severely Toxic: 0.0002
🚀 Prediction: Moderately Toxic
/n🔍 Model Probabilities:
electra: Non-Toxic: 0.0027, Moderately Toxic: 0.9848, Severely Toxic: 0.0125
deberta: Non-Toxic: 0.0009, Moderately Toxic: 0.9907, Severely Toxic: 0.0085
roberta: Non-Toxic: 0.0000, Moderately Toxic: 0.9993, Severely Toxic: 0.0007
hatebert: Non-Toxic: 0.0001, Moderately Toxic: 0.9988, Severely Toxic: 0.0011
/n🎯 Ensemble Probabilities:
Non-Toxic: 0.0009, Moderately Toxic: 0.9934, Severely Toxic: 0.0057
🚀 Prediction: Moderately Toxic
/n🔍 Model Probabilities:
electra: Non-Toxic: 0.0009, Moderately Toxic: 0.9966,

In [15]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import re
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from lime.lime_text import LimeTextExplainer

# Model Paths (Update these paths accordingly)
MODEL_PATHS = {
    "electra": "D:/Project Phase 1/Fine_Tuned Models/best_electra_model (1)",
    "deberta": "D:/Project Phase 1/Fine_Tuned Models/best_deberta",
    "roberta": "D:/Project Phase 1/Fine_Tuned Models/updated_twitter_roberta",
    "hatebert": "D:/Project Phase 1/Fine_Tuned Models/best_hatebert",
}

# Load Models and Tokenizers
models = {}
tokenizers = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for name, path in MODEL_PATHS.items():
    tokenizers[name] = AutoTokenizer.from_pretrained(path)
    models[name] = AutoModelForSequenceClassification.from_pretrained(path).to(device).eval()

# Label Mapping
label_mapping = {0: "Severely Toxic", 1: "Moderately Toxic", 2: "Non-Toxic"}

def classify_comment(comment, lime_mode=False):
    """Classifies a given comment and returns either probabilities or full classification."""
    ensemble_preds = np.zeros(3)  # Assuming 3 classes: 0, 1, 2
    model_probs = {}
    
    for model_name in models:
        # if model_name == "roberta":
        #     probs = np.array([1.0, 0.0, 0.0])  # Force RoBERTa to always classify as "Severely Toxic"
        # else:
        tokenizer = tokenizers[model_name]
        model = models[model_name]
        inputs = tokenizer(comment, padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs).logits
        probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()[0]

        model_probs[model_name] = probs
        ensemble_preds += probs / len(models)  # Uniform weighting
    
    if lime_mode:
        return ensemble_preds  # Return probabilities for LIME

    predicted_label = np.argmax(ensemble_preds)
    
    print("/n🔍 Model Probabilities:")
    for model_name, probs in model_probs.items():
        print(f"{model_name}: Non-Toxic: {probs[2]:.4f}, Moderately Toxic: {probs[1]:.4f}, Severely Toxic: {probs[0]:.4f}")
    
    print("/n🎯 Ensemble Probabilities:")
    print(f"Non-Toxic: {ensemble_preds[2]:.4f}, Moderately Toxic: {ensemble_preds[1]:.4f}, Severely Toxic: {ensemble_preds[0]:.4f}")

    return label_mapping[predicted_label], ensemble_preds

# LIME Explanation Setup
explainer = LimeTextExplainer(class_names=["Severely Toxic", "Moderately Toxic", "Non-Toxic"])

def clean_text(text):
    """Removes special characters like '*' without breaking words."""
    return re.sub(r'/*', '', text)  # Removes all asterisks but keeps word structure intact

def lime_explain(comment):
    """Generates a LIME explanation and saves a PNG plot of word importance."""

    def model_predict(texts):
        """Returns probability distributions for LIME while ensuring '*' does not split words."""
        cleaned_texts = [clean_text(text) for text in texts]  # Preprocess texts
        return np.array([classify_comment(text, lime_mode=True) for text in cleaned_texts])

    cleaned_comment = clean_text(comment)  # Preprocess input text
    exp = explainer.explain_instance(cleaned_comment, model_predict, num_features=10)

    # Extract words and scores
    words, scores = zip(*exp.as_list())

    # Plot
    plt.figure(figsize=(10, 5))
    plt.barh(words, scores, color="blue")
    plt.xlabel("Toxicity Score")
    plt.ylabel("Word")
    plt.title("LIME Word Importance for Toxicity")
    plt.gca().invert_yaxis()

    # Save as PNG
    plt.savefig("lime_explanation.png")
    plt.show()

# User Input Loop
print("/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):")
while True:
    user_input = input("/nEnter comment: ")
    if user_input.lower() == "exit":
        print("Exiting...👋")
        break
    result, _ = classify_comment(user_input)
    print(f"🚀 Prediction: {result}")
    lime_explain(user_input)


/n🔥 Abusive Comment Detection System 🔥/nEnter text to classify (type 'exit' to quit):


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


/n🔍 Model Probabilities:
electra: Non-Toxic: 0.0256, Moderately Toxic: 0.5384, Severely Toxic: 0.4360
deberta: Non-Toxic: 0.0022, Moderately Toxic: 0.9658, Severely Toxic: 0.0320
roberta: Non-Toxic: 0.0031, Moderately Toxic: 0.8440, Severely Toxic: 0.1530
hatebert: Non-Toxic: 0.0001, Moderately Toxic: 0.9996, Severely Toxic: 0.0003
/n🎯 Ensemble Probabilities:
Non-Toxic: 0.0077, Moderately Toxic: 0.8370, Severely Toxic: 0.1553
🚀 Prediction: Moderately Toxic


KeyboardInterrupt: 