In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import math
import numpy as np
import os

In [None]:
# Singlish toxic words dictionary
singlish_toxic_dict = ['ahbeng', 'ahlian', 'baka', 'bloody hell', 'bloody idiot', 'bodoh', 'bo liao',
                       'buay pai seh', 'buay tahan', 'cb', 'cb kia', 'cb knn', 'cb', 'cb lao jia', 'cb lao knn',
                       'cb lor', 'cb sia', 'cb sia kia', 'ccb', 'chbye kia', 'chao chbye', 'chao chee bye',
                       'chow cibai', 'chow kar', 'chow tu lan', 'cibai', 'dumb ass', 'dumb', 'fuck', 'fuck you', 'fking', 'fucker',
                       'fucker sia', 'gila babi', 'gundu', 'hao lian kia', 'hopeless', 'idiot', 'idiot', 'ji bai',
                       'jiat lat', 'jialat kia', 'jibai', 'joker', 'kan', 'kan ni na', 'kena sai', 'kia si lang', 'knn',
                       'knn cb kia', 'knnccb', 'knnbccb', 'kns', 'kns cb', 'lampa', 'lan pa', 'lanjiao', 'lanjiao kia', 'lj', 'loser', 'nabei',
                       'no use kia', 'noob', 'pok gai', 'pui', 'sabo kia', 'sibei jialat', 'sibei sian', 'si gina', 'siol', 'slut',
                       'siao lang', 'stupid', 'suck', 'sua gu', 'tmd', 'tiok knn', 'tiok tiam', 'useless', 'what knn', 'what the fuck', 'wtf',
                       'wu liao kia', 'you die ah', 'you die']

In [None]:
# List of models and which ones to include in output
models_config = [
    ('unitary/toxic-bert', True),  # Include this one
    ('unitary/unbiased-toxic-roberta', True),  # Include this one
    ('pykeio/lite-toxic-comment-classification', True),  # Include this one
    ('martin-ha/toxic-comment-model', False),
    ('JungleLee/bert-toxic-comment-classification', False),
    ('ZiruiXiong/bert-base-finetuned-toxic-comment-classification', False),
    ('longluu/distilbert-toxic-comment-classifier', False),
    ('prabhaskenche/toxic-comment-classification-using-RoBERTa', False),
]

# Get all model names for processing
model_names = [model[0] for model in models_config]

# Get models to include in output
models_to_output = [model[0] for model in models_config if model[1]]

In [None]:
def convert_manual_label(label):
    return "toxic" if label == 1 else "non-toxic"

def flag_singlish_toxic(text, toxic_dict):
    if isinstance(text, str):
        for word in toxic_dict:
            if word in text.lower():
                return "toxic"
    return None

def classify_toxicity_by_dynamic_chunks(text, tokenizer, classifier, max_length=512):
    try:
        inputs = tokenizer(text, return_tensors="pt", truncation=False)
        input_ids = inputs['input_ids'][0]
        total_tokens = len(input_ids)

        if total_tokens <= max_length:
            num_chunks = 1
            chunk_size = total_tokens
        else:
            num_chunks = math.ceil(total_tokens / max_length)
            chunk_size = math.ceil(total_tokens / num_chunks)

        toxicity_scores = []

        for i in range(num_chunks):
            start = i * chunk_size
            end = min((i + 1) * chunk_size, total_tokens)
            chunk = tokenizer.decode(input_ids[start:end], skip_special_tokens=True)
            result = classifier(chunk)
            toxicity_scores.append(result[0]['score'])

        return sum(toxicity_scores) / len(toxicity_scores)
    except Exception as e:
        print(f"Warning: Error processing text chunk: {str(e)}")
        return 0.0

def calculate_metrics(actual_labels, scores, dict_predictions, threshold):
    predicted_labels = []
    for i, score in enumerate(scores):
        if dict_predictions[i] == "toxic":
            predicted_labels.append("toxic")
        else:
            predicted_labels.append("toxic" if score > threshold else "non-toxic")

    tp = sum((a == 'toxic' and p == 'toxic') for a, p in zip(actual_labels, predicted_labels))
    fp = sum((a == 'non-toxic' and p == 'toxic') for a, p in zip(actual_labels, predicted_labels))
    fn = sum((a == 'toxic' and p == 'non-toxic') for a, p in zip(actual_labels, predicted_labels))
    tn = sum((a == 'non-toxic' and p == 'non-toxic') for a, p in zip(actual_labels, predicted_labels))

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return {
        'threshold': threshold,
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
        'precision': precision,
        'recall': recall
    }

In [None]:
# Main execution
# Get the current working directory (LLM folder)
current_dir = os.path.dirname(os.path.abspath(__file__))
# Go up one level to parent directory and then into data folder
data_dir = os.path.join(os.path.dirname(current_dir), 'data')
file_path_sample = os.path.join(data_dir, 'manual_label_sample.xlsx')
excel_output_path = os.path.join(data_dir, 'model_metrics.xlsx')

df_sample = pd.read_excel(file_path_sample)
thresholds = np.arange(0.1, 1.0, 0.1)

# Convert manual labels to toxic/non-toxic
actual_labels = df_sample['manual'].apply(convert_manual_label).tolist()

# First, apply dictionary-based flagging
dict_predictions = [flag_singlish_toxic(text, singlish_toxic_dict) for text in df_sample['text']]

# Store all results
all_results = {}

# Process all models
for model_name in model_names:
    print(f"Processing model: {model_name}")

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

    # Calculate toxicity scores
    scores = []
    for i, text in enumerate(df_sample['text']):
        if dict_predictions[i] == "toxic":
            scores.append(1.0)
        else:
            score = classify_toxicity_by_dynamic_chunks(text, tokenizer, classifier)
            scores.append(score)

    # Calculate and sort metrics for each threshold
    metrics_list = []
    for threshold in thresholds:
        metrics = calculate_metrics(actual_labels, scores, dict_predictions, threshold)
        metrics_list.append(metrics)

    # Sort by recall score
    metrics_list.sort(key=lambda x: x['recall'], reverse=True)
    all_results[model_name] = metrics_list

In [None]:
# Create Excel output only for selected models
with pd.ExcelWriter(excel_output_path, engine='xlsxwriter') as writer:
    workbook = writer.book
    worksheet = workbook.add_worksheet('Metrics')

    # Add formats
    header_format = workbook.add_format({
        'bold': True,
        'font_size': 12,
        'bg_color': '#4F81BD',
        'font_color': 'white',
        'border': 1
    })

    subheader_format = workbook.add_format({
        'bold': True,
        'font_size': 11,
        'bg_color': '#D3D3D3',
        'border': 1
    })

    cell_format = workbook.add_format({
        'border': 1
    })

    # Initialize row counter
    current_row = 0

    # Write only selected models to Excel
    for model_name in models_to_output:
        # Write model name header
        worksheet.merge_range(current_row, 0, current_row, 6, f"Model: {model_name}", header_format)
        current_row += 1

        # Write column headers
        headers = ['Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall']
        for col, header in enumerate(headers):
            worksheet.write(current_row, col, header, subheader_format)
        current_row += 1

        # Write metrics data
        for metrics in all_results[model_name]:
            worksheet.write(current_row, 0, metrics['threshold'], cell_format)
            worksheet.write(current_row, 1, metrics['tp'], cell_format)
            worksheet.write(current_row, 2, metrics['fp'], cell_format)
            worksheet.write(current_row, 3, metrics['fn'], cell_format)
            worksheet.write(current_row, 4, metrics['tn'], cell_format)
            worksheet.write(current_row, 5, metrics['precision'], cell_format)
            worksheet.write(current_row, 6, metrics['recall'], cell_format)
            current_row += 1

        # Add a blank row between models
        current_row += 1

    # Adjust column widths
    worksheet.set_column(0, 0, 12)  # Threshold
    worksheet.set_column(1, 4, 8)  # TP, FP, FN, TN
    worksheet.set_column(5, 6, 12)  # Precision, Recall

print(f"Results have been saved to {excel_output_path}")