
# Hate/Offensive Speech Classifier — Clean Training Notebook

This notebook retrains the model from **`data/labeled_data.csv`** (no legacy `.p` files), handles encoding issues, saves artifacts to the **`classifier/`** folder, and is compatible with `classifier/classifier.py` for inference.


In [3]:

# ---- Setup & imports
import os, re, joblib, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer

# Reproducibility
RANDOM_STATE = 42

# Paths (run notebook from the project root)
DATA_CSV = os.path.join("../data", "labeled_data.csv")
ARTIFACT_DIR = "classifier"               # where classifier.py will look
MODEL_PATH = os.path.join(ARTIFACT_DIR, "final_model.pkl")
TFIDF_PATH = os.path.join(ARTIFACT_DIR, "final_tfidf.pkl")
IDF_PATH   = os.path.join(ARTIFACT_DIR, "final_idf.pkl")   # legacy compat
POS_PATH   = os.path.join(ARTIFACT_DIR, "final_pos.pkl")   # legacy compat


In [4]:

# ---- Load data reliably (latin-1 handles legacy bytes without crashing)
df = pd.read_csv(DATA_CSV, encoding="latin-1")
df.columns = [c.strip() for c in df.columns]

# Davidson dataset columns typically include: 
# ["Unnamed: 0","count","hate_speech","offensive_language","neither","class","tweet"]
assert "tweet" in df.columns, f"Expected a 'tweet' column. Found: {df.columns.tolist()}"
assert "class" in df.columns, f"Expected a 'class' column. Found: {df.columns.tolist()}"

# Inputs/labels
X_text = df["tweet"].astype(str)
y = pd.to_numeric(df["class"], errors="raise").astype(int)

print("Loaded:", X_text.shape[0], "rows")
df.head(3)


Loaded: 24783 rows


Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...


In [5]:

# ---- Text cleaning (kept simple & deterministic)
URL_RE = re.compile(r"http\S+|www\.\S+")
MENTION_RE = re.compile(r"@\w+")
HASHTAG_RE = re.compile(r"#\w+")
MULTI_SPACE_RE = re.compile(r"\s+")

def clean_text(s: str) -> str:
    s = s.lower()
    s = URL_RE.sub(" ", s)
    s = MENTION_RE.sub(" ", s)
    s = HASHTAG_RE.sub(" ", s)
    s = re.sub(r"[^a-z0-9\s'!?.,]", " ", s)  # keep basic punctuation
    s = MULTI_SPACE_RE.sub(" ", s).strip()
    return s

X_clean = X_text.apply(clean_text)
X_clean.head(3)


0    !!! rt as a woman you shouldn't complain about...
1    !!!!! rt boy dats cold...tyga dwn bad for cuff...
2    !!!!!!! rt dawg!!!! rt you ever fuck a bitch a...
Name: tweet, dtype: object

In [6]:

# ---- Train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X_clean, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
)

# ---- TF-IDF (bi-grams help this dataset)
tfidf = TfidfVectorizer(
    ngram_range=(1,2),
    min_df=2,
    max_df=0.98,
    sublinear_tf=True,
    lowercase=False,   # we already lowercased in clean_text
)

Xtr = tfidf.fit_transform(X_train)
Xva = tfidf.transform(X_val)

# ---- Classifier
clf = LogisticRegression(
    max_iter=2000,
    class_weight="balanced",
    solver="liblinear"  # stable for sparse/binary problems
)

clf.fit(Xtr, y_train)

# ---- Quick eval
pred_val = clf.predict(Xva)
print("Accuracy:", accuracy_score(y_val, pred_val))
print(classification_report(y_val, pred_val, digits=3))


Accuracy: 0.8894492636675408
              precision    recall  f1-score   support

           0      0.487     0.388     0.432       286
           1      0.934     0.933     0.934      3838
           2      0.801     0.860     0.829       833

    accuracy                          0.889      4957
   macro avg      0.741     0.727     0.732      4957
weighted avg      0.886     0.889     0.887      4957



In [7]:

# ---- Ensure artifact dir exists
os.makedirs(ARTIFACT_DIR, exist_ok=True)

# Core artifacts
joblib.dump(clf, MODEL_PATH)
joblib.dump(tfidf, TFIDF_PATH)

# Legacy/compat artifacts (if your classifier.py loads them)
# - IDF: store the vectorizer's idf_ array
# - POS: some repos expect a POS feature pickle; store an empty dict if unused
try:
    joblib.dump(getattr(tfidf, "idf_", None), IDF_PATH)
except Exception as e:
    print("Warning saving IDF:", e)

joblib.dump({}, POS_PATH)  # harmless placeholder

print("Saved:")
print(" -", MODEL_PATH)
print(" -", TFIDF_PATH)
print(" -", IDF_PATH)
print(" -", POS_PATH)


Saved:
 - classifier\final_model.pkl
 - classifier\final_tfidf.pkl
 - classifier\final_idf.pkl
 - classifier\final_pos.pkl


In [8]:

# ---- Reload & test exactly like inference
clf2 = joblib.load(MODEL_PATH)
tfidf2 = joblib.load(TFIDF_PATH)

def predict_texts(texts):
    cleaned = [clean_text(t) for t in texts]
    X = tfidf2.transform(cleaned)
    return clf2.predict(X)

samples = [
    "Good morning everyone, have a great day!",
    "You're acting so stupid right now",
    "All people of XYZ group are disgusting and should disappear"
]
preds = predict_texts(samples)
for s,p in zip(samples, preds):
    print(p, "->", s)


2 -> Good morning everyone, have a great day!
1 -> You're acting so stupid right now
2 -> All people of XYZ group are disgusting and should disappear



### Optional helper functions (if you want to mirror `classifier.py` helpers here)


In [9]:

ID2NAME = {0: "Hate speech", 1: "Offensive language", 2: "Neither"}
def class_to_name(i): return ID2NAME.get(int(i), str(i))

def get_tweets_predictions(text_list):
    cleaned = [clean_text(t) for t in text_list]
    X = tfidf2.transform(cleaned)
    return clf2.predict(X)

# Demo format identical to classifier.py printing
demo = [
    "I hate you",
    "You are amazing!",
    "What a stupid idea",
    "Good morning everyone :)"
]
preds = get_tweets_predictions(demo)
for s,p in zip(demo, preds):
    print(int(p), class_to_name(int(p)), "->", s)


0 Hate speech -> I hate you
0 Hate speech -> You are amazing!
2 Neither -> What a stupid idea
2 Neither -> Good morning everyone :)
