In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load models and tokenizers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Binary classifier
binary_model_path = "/content/drive/MyDrive/..."
tokenizer_binary = AutoTokenizer.from_pretrained(binary_model_path)
model_binary = AutoModelForSequenceClassification.from_pretrained(binary_model_path).to(device)

# Fine-grained classifier
fine_model_path = "/content/drive/MyDrive/..."
tokenizer_fine = AutoTokenizer.from_pretrained(fine_model_path)
model_fine = AutoModelForSequenceClassification.from_pretrained(fine_model_path).to(device)

# Labels
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

# Inference function
def predict_toxicity(comment):
    # Binary classification
    binary_inputs = tokenizer_binary(comment, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        binary_outputs = model_binary(**binary_inputs)
        probs = torch.softmax(binary_outputs.logits, dim=1)
        toxic_prob = probs[:, 1].item()  # probability of toxic class

        # Decide label based on fixed threshold
        binary_pred = "toxic" if toxic_prob >= 0.5 else "non-toxic"
        if binary_pred == "non-toxic":
            return {"binary": binary_pred, "subtypes": None, "toxic_prob": toxic_prob}

    # Stage 2: Fine-grained classification
    fine_inputs = tokenizer_fine(comment, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        fine_outputs = model_fine(**fine_inputs)
        fine_probs = torch.sigmoid(fine_outputs.logits).cpu().numpy()[0]

    subtypes = {label: round(float(prob), 2) for label, prob in zip(label_cols, fine_probs) if prob >= 0.5}
    return {"binary": binary_pred, "subtypes": subtypes, "toxic_prob": toxic_prob}


# Example usage
if __name__ == "__main__":
    sample = "You're a disgusting idiot and should be banned."
    result = predict_toxicity(sample)
    print(result)


In [None]:
!pip install datasets


In [None]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("textdetox/multilingual_paradetox_test")

In [None]:
from datasets import load_dataset

# Load the dataset
ds = load_dataset("textdetox/multilingual_paradetox_test")

# Check column names in the English split
print(ds["en"].column_names)


In [None]:
from datasets import load_dataset

# Load the English test set
ds = load_dataset("textdetox/multilingual_paradetox_test")
english_data = ds["en"]

# Extract raw toxic text
toxic_sentences = english_data["text"]

# Run predictions
results = [predict_toxicity(s) for s in toxic_sentences]


In [None]:
import pandas as pd

df_results = pd.DataFrame({
    "text": toxic_sentences,
    "binary_prediction": [r["binary"] for r in results],
    "subtypes": [r["subtypes"] for r in results],
    "toxic_prob": [r["toxic_prob"] for r in results]
})


# Save to Drive or local
df_results.to_csv("/content/drive/MyDrive/...", index=False)


In [None]:
from sklearn.metrics import classification_report


print(classification_report(
    true_labels,
    preds_adjusted,
    labels=[0, 1],
    target_names=["non-toxic", "toxic"],
    digits=4,
    zero_division=0
))


In [None]:


from google.colab import files
files.download("/content/drive/MyDrive/...")
