In [None]:
import time
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

sns.set_context("talk")

In [None]:
# =========================
# 1) Load embeddings + labels
# =========================
SAVE_DIR = "models/svm"
X_train = np.load(f"{SAVE_DIR}/X_train_emb.npy")
y_train = np.load(f"{SAVE_DIR}/y_train.npy")
X_test = np.load(f"{SAVE_DIR}/X_test_emb.npy")
y_test = np.load(f"{SAVE_DIR}/y_test.npy")

print("Train shape:", X_train.shape)
print("Test shape :", X_test.shape)

# =========================
# 2) Pipeline: Scaling + LinearSVC
# =========================
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", LinearSVC(
        max_iter=5000,
        class_weight="balanced",
        random_state=42,
    ))
])

# =========================
# 3) GridSearchCV
# =========================
C_values = np.logspace(-2, 4, 30)
param_grid = {"svm__C": C_values}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
n_splits = cv.get_n_splits(X_train, y_train)

print("C values to try:", C_values)
print(f"CV splits: {n_splits}")
print(f"Total fits: {len(C_values)} * {n_splits} = {len(C_values) * n_splits}")

N_JOBS = 1

t0 = time.time()
grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    scoring="f1_macro",
    cv=cv,
    n_jobs=N_JOBS,
    refit=True,
    verbose=10,
    return_train_score=True,
    error_score="raise",
 )

grid.fit(X_train, y_train)
print(f"Elapsed: {time.time() - t0:.1f}s")

print("Best params:", grid.best_params_)
print("Best CV f1_macro:", f"{grid.best_score_:.4f}")

# =========================
# 4) Plot: CV score vs C
# =========================
results = grid.cv_results_
Cs = results["param_svm__C"].data.astype(float)
mean_test = results["mean_test_score"]
std_test = results["std_test_score"]

order = np.argsort(Cs)
Cs, mean_test, std_test = Cs[order], mean_test[order], std_test[order]

plt.figure(figsize=(9, 5))
plt.semilogx(Cs, mean_test, marker="o", label="Mean CV f1_macro")
plt.fill_between(Cs, mean_test - std_test, mean_test + std_test, alpha=0.2)
plt.axvline(grid.best_params_["svm__C"], linestyle="--", linewidth=1, label="Best C")
plt.title("GridSearchCV: f1_macro vs C (5-fold Stratified)")
plt.xlabel("C (log scale)")
plt.ylabel("f1_macro")
plt.grid(True, which="both", linestyle=":", linewidth=0.7)
plt.legend()
plt.show()

Train shape: (37698, 768)
Test shape : (9425, 768)
C values to try: [1.00000000e-03 4.64158883e-03 2.15443469e-02 1.00000000e-01
 4.64158883e-01 2.15443469e+00 1.00000000e+01 4.64158883e+01
 2.15443469e+02 1.00000000e+03]
CV splits: 5
Total fits: 10 * 5 = 50
Fitting 5 folds for each of 10 candidates, totalling 50 fits


KeyboardInterrupt: 

In [None]:
# =========================
# 5) Evaluate best model on TEST + Confusion Matrix
# =========================
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="macro")

print(f"Test Accuracy : {acc:.4f}")
print(f"Test F1-macro : {f1:.4f}")
print("\nClassification report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
labels = np.unique(np.concatenate([y_test, y_pred]))
cm = confusion_matrix(y_test, y_pred, labels=labels)

plt.figure(figsize=(9, 7))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=labels,
    yticklabels=labels
 )
plt.title("Confusion Matrix (Test)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

# =========================
# 6) Save best model
# =========================
out_path = f"{SAVE_DIR}/linear_svc_phobert.joblib"
joblib.dump(best_model, out_path)
print(f">> Saved best model to: {out_path}")

Train shape: (37698, 768)
Test shape : (9425, 768)
>> Training LinearSVC...
Accuracy : 0.8593
F1-macro : 0.8589

Classification report:
              precision    recall  f1-score   support

           0       0.91      0.90      0.91       730
           1       0.81      0.78      0.80       716
           2       0.76      0.77      0.76       731
           3       0.93      0.93      0.93       721
           4       0.78      0.75      0.77       728
           5       0.85      0.86      0.85       714
           6       0.91      0.93      0.92       734
           7       0.81      0.82      0.81       733
           8       0.85      0.84      0.85       710
           9       0.93      0.96      0.95       735
          10       0.78      0.82      0.80       714
          11       0.87      0.86      0.87       733
          12       0.96      0.95      0.96       726

    accuracy                           0.86      9425
   macro avg       0.86      0.86      0.86      942