In [2]:
import pandas as pd
from dotenv import load_dotenv
from groq import Groq
import json
import time
import re

In [3]:
# Load .env with GROQ_API_KEY
load_dotenv()

# Initialize Groq client
groq = Groq()

In [4]:
# LLM Classification Function
def classify_comment_with_llm(comment_text):
    prompt = f'''
You are a content moderation assistant.

Analyze the following user comment and respond in JSON format with:
- is_offensive: true/false
- offense_type: one of ["hate speech", "harassment", "profanity", "toxicity", "none"]
- explanation: a short explanation of why it was or wasn't offensive.

Comment: """{comment_text}"""

Respond ONLY with JSON:
{{
  "is_offensive": true/false,
  "offense_type": "one of above",
  "explanation": "..."
}}
'''

    try:
        chat_completion = groq.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="deepseek-r1-distill-llama-70b",
            temperature=0.3
        )

        content = chat_completion.choices[0].message.content.strip()
        json_str = re.search(r'\{.*\}', content, flags=re.DOTALL).group(0)
        result = json.loads(json_str)

    except Exception as e:
        result = {
            "is_offensive": False,
            "offense_type": "none",
            "explanation": f"Error or malformed response: {str(e)}"
        }

    return result


In [7]:
# Load comments CSV
input_file = "comments.csv"  # <-- Your input file here
df = pd.read_csv(input_file).head(100)
df['comment_text'] = df['comment_text'].fillna("")

results = []
for i, row in df.iterrows():
    print(f"Analyzing comment {i+1}/{len(df)}...")
    result = classify_comment_with_llm(row['comment_text'])
    results.append(result)
    time.sleep(1)  # Prevent rate limiting

# Add AI-generated fields to DataFrame
df['is_offensive'] = [r['is_offensive'] for r in results]
df['offense_type'] = [r['offense_type'] for r in results]
df['explanation'] = [r['explanation'] for r in results]

Analyzing comment 1/100...
Analyzing comment 2/100...
Analyzing comment 3/100...
Analyzing comment 4/100...
Analyzing comment 5/100...
Analyzing comment 6/100...
Analyzing comment 7/100...
Analyzing comment 8/100...
Analyzing comment 9/100...
Analyzing comment 10/100...
Analyzing comment 11/100...
Analyzing comment 12/100...
Analyzing comment 13/100...
Analyzing comment 14/100...
Analyzing comment 15/100...
Analyzing comment 16/100...
Analyzing comment 17/100...
Analyzing comment 18/100...
Analyzing comment 19/100...
Analyzing comment 20/100...
Analyzing comment 21/100...
Analyzing comment 22/100...
Analyzing comment 23/100...
Analyzing comment 24/100...
Analyzing comment 25/100...
Analyzing comment 26/100...
Analyzing comment 27/100...
Analyzing comment 28/100...
Analyzing comment 29/100...
Analyzing comment 30/100...
Analyzing comment 31/100...
Analyzing comment 32/100...
Analyzing comment 33/100...
Analyzing comment 34/100...
Analyzing comment 35/100...
Analyzing comment 36/100...
A

In [None]:
# output_file = "output_file.csv"
# df.to_csv(output_file, index=False)

In [1]:
import pandas as pd
df = pd.read_csv("comments.csv")

In [2]:
df = df.head(1000)

In [3]:
df.to_csv("comments.csv", index=False)