# 1-Imports

In [39]:
import numpy as np
import joblib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 2-Load features


In [40]:
X_train = np.load("../features/train_features.npy")
y_train = np.load("../features/train_labels.npy")

X_val = np.load("../features/val_features.npy")
y_val = np.load("../features/val_labels.npy")
print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)

Train: (2804, 1290) (2804,)
Val:   (463, 1290) (463,)


# 3-KNN pipeline

In [32]:
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(
        n_neighbors=9,
        weights='distance',
        metric='euclidean',
    ))
])

# 4-Train Model

In [33]:
print("Training KNN classifier...")
knn_pipeline.fit(X_train, y_train)
print("Done.")

Training KNN classifier...
Done.


# 5-Test Model

In [34]:
y_pred = knn_pipeline.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))
print("Classification Report:\n", classification_report(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))

Validation Accuracy: 0.8142548596112311
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.79      0.77        96
           1       0.85      0.93      0.89       112
           2       0.98      0.87      0.92        61
           3       0.74      0.81      0.77        90
           4       0.85      0.78      0.81        78
           5       0.71      0.38      0.50        26

    accuracy                           0.81       463
   macro avg       0.81      0.76      0.78       463
weighted avg       0.82      0.81      0.81       463

Confusion Matrix:
 [[ 76   0   0  14   6   0]
 [  3 104   0   3   1   1]
 [  0   7  53   0   0   1]
 [ 11   3   0  73   1   2]
 [ 12   4   0   1  61   0]
 [  0   4   1   8   3  10]]


In [35]:
UNKNOWN_THRESHOLD = 0.3
UNKNOWN_THRESHOLD = 0.3
def predict_with_unknown_knn(model, X, threshold=0.6):
    probs = model.predict_proba(X)
    max_probs = np.max(probs, axis=1)
    preds = np.argmax(probs, axis=1)

    preds[max_probs < threshold] = 6
    return preds, max_probs


In [36]:
y_pred_unknown, conf = predict_with_unknown_knn(
    knn_pipeline, X_val, threshold=UNKNOWN_THRESHOLD
)

print("Accuracy with Unknown:",
        accuracy_score(y_val, y_pred_unknown))


Accuracy with Unknown: 0.8142548596112311


In [38]:
for k in range(1, 21, 2):
    model = Pipeline([
        ("scaler", StandardScaler()),
        ("knn", KNeighborsClassifier(
            n_neighbors=k,
            weights="distance",
            metric="euclidean"
        ))
    ])
    model.fit(X_train, y_train)
    acc = accuracy_score(y_val, model.predict(X_val))
    print(f"k={k:2d} | Accuracy={acc:.4f}")


k= 1 | Accuracy=0.8121
k= 3 | Accuracy=0.8056
k= 5 | Accuracy=0.8186
k= 7 | Accuracy=0.8186
k= 9 | Accuracy=0.8143
k=11 | Accuracy=0.8164
k=13 | Accuracy=0.8099
k=15 | Accuracy=0.8164
k=17 | Accuracy=0.8099
k=19 | Accuracy=0.7970


# 7-Save Model


In [24]:
joblib.dump(knn_pipeline, "knn_hist_grad.joblib")
print("k-NN model saved.")

k-NN model saved.
