In [21]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report


In [22]:
# Step 2: Load Dataset
df = pd.read_csv("train.csv")  # Jigsaw dataset
df = df[['comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]
df.head()


Unnamed: 0,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [23]:
# Step 3: Clean the text data
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return text.strip()

df['clean_text'] = df['comment_text'].apply(clean_text)


In [24]:
# Step 4: Train-test split
X = df['clean_text']
y = df[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [25]:
# Step 5: TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=10000, stop_words='english')
X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)


In [26]:
# Step 6: Train Logistic Regression with MultiOutputClassifier
lr = LogisticRegression(max_iter=1000)
multi_model = MultiOutputClassifier(lr)
multi_model.fit(X_train_vec, y_train)


0,1,2
,estimator,LogisticRegre...max_iter=1000)
,n_jobs,

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [27]:
# Step 7: Evaluate the Model
y_pred = multi_model.predict(X_test_vec)
print(classification_report(y_test, y_pred, target_names=y.columns))


               precision    recall  f1-score   support

        toxic       0.91      0.62      0.74      1480
 severe_toxic       0.56      0.27      0.37       148
      obscene       0.92      0.63      0.75       836
       threat       0.86      0.16      0.27        37
       insult       0.85      0.52      0.65       791
identity_hate       0.83      0.16      0.27       147

    micro avg       0.88      0.56      0.69      3439
    macro avg       0.82      0.39      0.51      3439
 weighted avg       0.88      0.56      0.68      3439
  samples avg       0.06      0.05      0.05      3439



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [28]:
# Step 8: Save the Model and TF-IDF Vectorizer
with open("logistic_model.pkl", "wb") as f:
    pickle.dump(multi_model, f)

with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)


In [18]:
# Load and test
model = pickle.load(open("../models/logistic_model.pkl", "rb"))
vectorizer = pickle.load(open("../models/tfidf_vectorizer.pkl", "rb"))

def predict(text):
    text_cleaned = clean_text(text)
    vector = vectorizer.transform([text_cleaned])
    pred = model.predict(vector)[0]
    labels = y.columns
    return {label: bool(p) for label, p in zip(labels, pred)}

predict("I am going to find you and hurt you.")


NameError: name 'y' is not defined

In [19]:
import pickle
import re

# Load model and vectorizer
model = pickle.load(open("../models/logistic_model.pkl", "rb"))
vectorizer = pickle.load(open("../models/tfidf_vectorizer.pkl", "rb"))

# Labels and explanations
LABEL_MAP = {
    "toxic": "Mildly Toxic",
    "severe_toxic": "Highly Abusive",
    "obscene": "Obscene Language",
    "threat": "Threatening Content",
    "insult": "Contains Insults",
    "identity_hate": "Hate Speech"
}
LABELS = list(LABEL_MAP.keys())

# Grouping for natural language
CATEGORIES = {
    "Toxic": ["toxic"],
    "Abusive": ["severe_toxic", "obscene", "insult"],
    "Threatening": ["threat"],
    "Hateful": ["identity_hate"]
}

thresholds = {label: 0.5 for label in LABELS}

# Clean text
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+|@\w+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return text.strip()

# Main prediction + summary
def predict_and_interpret(text: str) -> str:
    cleaned = clean_text(text)
    vec = vectorizer.transform([cleaned])
    probs = model.predict_proba(vec)

    pred = {}
    for i, label in enumerate(LABELS):
        prob = probs[i][0][1]
        pred[label] = prob >= thresholds[label]

    # Generate category-based summary
    summary = []
    for category, labels in CATEGORIES.items():
        if any(pred.get(lbl) for lbl in labels):
            summary.append(category)

    # Specific triggers
    details = [f"✅ {LABEL_MAP[label]}" for label in LABELS if pred[label]]

    if not summary:
        return "✅ This comment appears to be: Not Toxic or Offensive."

    final = f"🧠 This comment appears to be: **{', '.join(summary)}**\n\nBreakdown:\n" + "\n".join(details)
    return final


In [20]:
comment = "I’m going to find you and hurt you."
print(predict_and_interpret(comment))


✅ This comment appears to be: Not Toxic or Offensive.
