In [8]:
# 📦 Step 1: Install required libraries
!pip install -q transformers pandas torch

# 📌 Step 2: Import libraries
import pandas as pd
from transformers import pipeline

# 📌 Step 3: Load your comment dataset
# Replace with your actual file path if different
df = pd.read_csv("/content/sample traing data.csv")  # Make sure your file is uploaded in Colab
print("✅ File loaded.")
print(df.head())

# 📌 Step 4: Load the pre-trained toxic content classification model from Hugging Face
print("⏳ Loading model...")
moderator = pipeline("text-classification", model="unitary/toxic-bert")
print("✅ Model loaded.")

# 📌 Step 5: Define classification function
def classify_comment(comment):
    try:
        # Truncate comment to 512 tokens (model limit)
        result = moderator(comment[:512])[0]
        label = result['label']
        score = result['score']
        return f"{label} ({round(score, 2)})"
    except Exception as e:
        print(f"⚠️ Error on comment: {comment}\n{e}")
        return "Error"

# 📌 Step 6: Apply function to the DataFrame
df["Moderation_Result"] = df["comment_text"].apply(classify_comment)

# 📌 Step 7: Save the output
df.to_csv("moderated_comments_bert.csv", index=False)
df.to_excel("moderated_comments_bert.xlsx", index=False)

print("✅ Done! Files saved as 'moderated_comments_bert.csv' and 'moderated_comments_bert.xlsx'")


✅ File loaded.
   comment_id username                    comment_text
0           1   user_1       Nice work on the project.
1           2   user_2      Nobody likes you, go away.
2           3   user_3  I'm grateful for your support.
3           4   user_4        I really like your idea.
4           5   user_5        I really like your idea.
⏳ Loading model...


Device set to use cuda:0


✅ Model loaded.
✅ Done! Files saved as 'moderated_comments_bert.csv' and 'moderated_comments_bert.xlsx'
