In [10]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA

In [5]:
FEATURE_PATH = "../dataset/processed"
X = np.load(f"{FEATURE_PATH}/X_hog.npy")  # or X_lbp.npy / X_color.npy
y = np.load(f"{FEATURE_PATH}/y.npy")

print("X shape:", X.shape)
print("y shape:", y.shape)

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training samples:", X_train.shape[0])
print("Validation samples:", X_val.shape[0])

X shape: (2856, 6354)
y shape: (2856,)
Training samples: 2284
Validation samples: 572


In [22]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

print("Scaled X_train shape:", X_train_scaled.shape)
print("Scaled X_val shape:", X_val_scaled.shape)

pca = PCA(n_components=200)  # try 200, 300, or 500
X_train_scaled = pca.fit_transform(X_train_scaled)
X_val_scaled = pca.transform(X_val_scaled)

print("After PCA, X_train shape:", X_train_scaled.shape)
print("After PCA, X_val shape:", X_val_scaled.shape)

Scaled X_train shape: (2284, 6354)
Scaled X_val shape: (572, 6354)
After PCA, X_train shape: (2284, 200)
After PCA, X_val shape: (572, 200)


In [24]:
knn_model = KNeighborsClassifier(
    n_neighbors=7,       # you can try 3, 5, 7 later
    weights='distance',  # 'uniform' is another option
    n_jobs=-1            # use all CPU cores
)

# Train
knn_model.fit(X_train_scaled, y_train)
print("k-NN training completed.")

k-NN training completed.


In [25]:
y_pred = knn_model.predict(X_val_scaled)

# Accuracy
val_acc = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", val_acc)

Validation Accuracy: 0.6695804195804196


In [26]:
import os

# Path to your dataset (augmented or processed images)
DATASET_PATH = "../dataset/augmented"

# Define classes by listing subfolders, excluding 'unknown'
CLASSES = sorted([
    d for d in os.listdir(DATASET_PATH)
    if os.path.isdir(os.path.join(DATASET_PATH, d)) and d != 'unknown'
])

print("Classes:", CLASSES)

print(classification_report(y_val, y_pred, target_names=CLASSES))

Classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
              precision    recall  f1-score   support

   cardboard       0.79      0.78      0.78        95
       glass       0.62      0.58      0.60        95
       metal       0.66      0.49      0.57        95
       paper       0.71      0.75      0.73        96
     plastic       0.49      0.60      0.54        96
       trash       0.79      0.81      0.80        95

    accuracy                           0.67       572
   macro avg       0.68      0.67      0.67       572
weighted avg       0.68      0.67      0.67       572



In [27]:
# Get distances to neighbors
distances, neighbors = knn_model.kneighbors(X_val_scaled)

# Example: reject if average distance > threshold
threshold = 5.0  # adjust based on your distances
y_pred_with_unknown = []
for pred, dist in zip(y_pred, distances):
    if dist.mean() > threshold:
        y_pred_with_unknown.append(6)  # label 6 = 'unknown'
    else:
        y_pred_with_unknown.append(pred)

# Accuracy with unknowns
val_acc_unknown = accuracy_score(y_val, y_pred_with_unknown)
print("Validation Accuracy with Unknown rejection:", val_acc_unknown)


Validation Accuracy with Unknown rejection: 0.0
