## KNN Classifier for Steel surface defect recognition

### Note: for this case, the number of neighbors in KNN were set to `1` and the dataset was also configured accordingly.

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os

### KNN classifier arguments

#### NOTE: You may need to change the path in the below dictionary as per your dataset 

In [None]:
args = {'training_dataset': './dataset_all/knn_data/training',
        'testing_dataset': './dataset_all/knn_data/testing',
        'neighbors': 1,
        'jobs': 1}

### Convert image to Vector
Function to *resize* the image to a fixed size, then flatten the image into a list of raw pixel intensities

In [None]:
def image_to_feature_vector(image, size=(32, 32)):
    return cv2.resize(image, size).flatten()

### Extract a 3D color histogram from the HSV color space using the supplied number of `bins` per channel

In [None]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

### Image paths

In [None]:
imagePaths = list(paths.list_images(args["training_dataset"]))
imagePaths_test = list(paths.list_images(args["testing_dataset"]))

### Helper function to prepare data
* Extract each image and corresponding class label

In [None]:
def prepare_data(imagePaths):
    rawImages, features, labels = [], [], []
    
    for (i, imagePath) in enumerate(imagePaths):    
        image = cv2.imread(imagePath)
        label = imagePath.split(os.path.sep)[-1].split(".")[0].split("_")[0]
        pixels = image_to_feature_vector(image)
        hist = extract_color_histogram(image)
        
        rawImages.append(pixels)
        features.append(hist)
        labels.append(label)
    
    return rawImages, features, labels

### Load training and testing data

In [None]:
train_images, train_features, train_labels = prepare_data(imagePaths)
test_images, test_features, test_labels = prepare_data(imagePaths_test)

print("[INFO] Shape of the image sampled from dataset:", train_images[0].shape)

### Memory consumed by the raw images matrix and features matrix

##### Training set

In [None]:
trainRI = np.array(train_images)
trainRL = np.array(train_labels)
trainF = np.array(train_features)
print("[INFO] pixels matrix: {:.2f}MB".format(trainRI.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(trainF.nbytes / (1024 * 1000.0)))

##### Testing set

In [None]:
testRI = np.array(test_images)
testRL = np.array(test_labels)
testF = np.array(test_features)
print("[INFO] pixels matrix: {:.2f}MB".format(testRI.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(testF.nbytes / (1024 * 1000.0)))

### Train and evaluate the k-NN classifer on the raw pixel intensities

In [None]:
print("[INFO] evaluating raw pixel accuracy...")
model = KNeighborsClassifier(n_neighbors=args["neighbors"], n_jobs=args["jobs"])
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)

print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

### Train and test KNN-classifier on Histogram Representation of image data

##### Load data

In [None]:
train_test_data = train_test_split(features, labels, test_size=0.25, random_state=42)

train_features, test_features, trainLabels, testLabels = train_test_data

#### Histogram representations - train and evaluate k-NN classifer

In [None]:
print("[INFO] evaluating histogram accuracy...")

model = KNeighborsClassifier(n_neighbors=args["neighbors"],n_jobs=args["jobs"])

model.fit(train_features, trainLabels)

acc = model.score(test_features, testLabels)
print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))