# 1-Imports

In [58]:
import numpy as np
import joblib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 2-Load features


In [59]:
X_train = np.load("../features/train_features.npy")
y_train = np.load("../features/train_labels.npy")

X_val = np.load("../features/val_features.npy")
y_val = np.load("../features/val_labels.npy")
print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)

Train: (3000, 512) (3000,)
Val:   (373, 512) (373,)


# 3-KNN pipeline

In [60]:
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(
        n_neighbors=9,
        weights='distance',
        metric='euclidean',
    ))
])

# 4-Train Model

In [61]:
print("Training KNN classifier...")
knn_pipeline.fit(X_train, y_train)
print("Done.")

Training KNN classifier...
Done.


# 5-Test Model

In [62]:
y_pred = knn_pipeline.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))

Validation Accuracy: 0.8471849865951743
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.72      0.78        72
           1       0.94      0.91      0.93        91
           2       0.94      1.00      0.97        45
           3       0.80      0.83      0.81        81
           4       0.80      0.78      0.79        60
           5       0.67      0.92      0.77        24

    accuracy                           0.85       373
   macro avg       0.83      0.86      0.84       373
weighted avg       0.85      0.85      0.85       373

Confusion Matrix:
 [[52  1  0 11  8  0]
 [ 0 83  3  2  0  3]
 [ 0  0 45  0  0  0]
 [ 5  2  0 67  2  5]
 [ 4  2  0  4 47  3]
 [ 0  0  0  0  2 22]]


In [63]:
UNKNOWN_THRESHOLD = 0.6
def predict_with_unknown_knn(model, X, threshold=0.6):
    probs = model.predict_proba(X)
    max_probs = np.max(probs, axis=1)
    preds = np.argmax(probs, axis=1)

    preds[max_probs < threshold] = 6
    return preds, max_probs


In [64]:
y_pred_unknown, conf = predict_with_unknown_knn(
    knn_pipeline, X_val, threshold=UNKNOWN_THRESHOLD
)

print("Accuracy with Unknown:",
      accuracy_score(y_val, y_pred_unknown))


Accuracy with Unknown: 0.6970509383378016


In [65]:
for k in [3, 5, 7, 9, 11]:
    model = Pipeline([
        ("scaler", StandardScaler()),
        ("knn", KNeighborsClassifier(
            n_neighbors=k,
            weights="distance",
            metric="euclidean"
        ))
    ])
    model.fit(X_train, y_train)
    acc = accuracy_score(y_val, model.predict(X_val))
    print(f"k={k:2d} | Accuracy={acc:.4f}")


k= 3 | Accuracy=0.8338
k= 5 | Accuracy=0.8365
k= 7 | Accuracy=0.8338
k= 9 | Accuracy=0.8472
k=11 | Accuracy=0.8284


# 7-Save Model


In [66]:
joblib.dump(knn_pipeline, "knn_hist_grad.joblib")
print("k-NN model saved.")

k-NN model saved.
