In [5]:
# train_linear_svm.py
import os, re, time, joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, f1_score

# =========================
# PATH (ห้ามใช้ D:\ แบบ hardcode)
# =========================
DATA_PATH = "data/2.synthetic_wisesight_like_thai_sentiment_hard_5000 (1).csv"
OUT_DIR = "outputs"
OUT_PATH = os.path.join(OUT_DIR, "LinearSVM.joblib")

os.makedirs(OUT_DIR, exist_ok=True)

# =========================
# CLEAN
# =========================
def clean(text):
    return re.sub(r"\s+", " ", str(text)).strip()

# =========================
# LOAD DATA
# =========================
df = pd.read_csv(DATA_PATH)[["text", "sentiment"]].dropna()
df["text"] = df["text"].apply(clean)

# =========================
# SPLIT
# =========================
Xtr, Xte, ytr, yte = train_test_split(
    df["text"],
    df["sentiment"],
    test_size=0.2,
    stratify=df["sentiment"],
    random_state=42
)

# =========================
# MODEL
# =========================
svm = LinearSVC(C=1.0)
svm_cal = CalibratedClassifierCV(svm)

pipe = Pipeline([
    ("tfidf", TfidfVectorizer(
        ngram_range=(1,2),
        min_df=5,
        max_df=0.9,
        sublinear_tf=True
    )),
    ("clf", svm_cal)
])

# =========================
# TRAIN
# =========================
pipe.fit(Xtr, ytr)
pred = pipe.predict(Xte)

acc = accuracy_score(yte, pred)
f1  = f1_score(yte, pred, average="macro")

# =========================
# SAVE MODEL (WITH VERSION)
# =========================
joblib.dump({
    "pipeline": pipe,
    "model_name": "Linear SVM",
    "model_version": "v1.0",
    "accuracy": acc,
    "macro_f1": f1,
    "trained_at": time.strftime("%Y-%m-%d %H:%M:%S")
}, OUT_PATH)

print("✅ Linear SVM v1.0 trained")
print("Accuracy :", acc)
print("Macro-F1 :", f1)
print("Saved to :", OUT_PATH)


✅ Linear SVM v1.0 trained
Accuracy : 0.994
Macro-F1 : 0.9941666666666666
Saved to : outputs\LinearSVM.joblib
