In [None]:
import matplotlib.pyplot as plt 
import os
import sys
import cv2
import numpy as np 
import numba
import gzip 
from sklearn import datasets, svm, metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from skimage import exposure
!apt install libomp-dev
!python -m pip install --upgrade faiss faiss-gpu
import faiss
import pickle


Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  libomp5
Suggested packages:
  libomp-doc
The following NEW packages will be installed:
  libomp-dev libomp5
0 upgraded, 2 newly installed, 0 to remove and 15 not upgraded.
Need to get 239 kB of archives.
After this operation, 804 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libomp5 amd64 5.0.1-1 [234 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libomp-dev amd64 5.0.1-1 [5,088 B]
Fetched 239 kB in 1s (356 kB/s)
Selecting previously unselected package libomp5:amd64.
(Reading database ... 145480 files and directories currently installed.)
Preparing to unpack .../libomp5_5.0.1-1_amd64.deb ...
Unpacking libomp5:amd64 (5.0.1-1) ...
Selecting previously unselected package libomp-dev.
Preparing to unpack .../libomp-dev_5.0.1-1_amd64.deb ...
Unpacking libomp-dev (5.0.1-

In [None]:
"""----------------------load data function----------------------"""

def load_mnist(path, kind = "train"): 
    labels_path = os.path.join(path, "%s-labels-idx1-ubyte.gz" % kind)
    images_path = os.path.join(path, "%s-images-idx3-ubyte.gz" % kind)

    with gzip.open(labels_path, "rb") as lbpath: 
        lbpath.read(8) 
        buffer = lbpath.read() 
        labels = np.frombuffer(buffer, dtype=np.uint8) 
    with gzip.open(images_path, "rb") as imgpath: 
        imgpath.read(16) 
        buffer = imgpath.read() 
        images = np.frombuffer(buffer, dtype=np.uint8).reshape(len(labels), 28, 28).astype(np.float64)
    
    return images, labels 


In [None]:
"""----------------------vectorize----------------------"""

def vectorize(arr): 
    temp = [[] for i in range(arr.shape[0])]

    for i in range(arr.shape[0]): 
        temp[i] = arr[i].flatten() 

    return np.asarray(temp)
    

In [None]:
"""----------------------downsampling----------------------"""

def downsampling(arr, shape): 
    temp = [[] for i in range(arr.shape[0])]

    for i in range(arr.shape[0]): 
        temp[i] = cv2.resize(arr[i].astype(np.uint8), shape)

    return np.asarray(temp)

In [None]:
"""----------------------histogram----------------------"""

def histogram(arr): 
  temp = [[] for i in range(arr.shape[0])]

  for i in range(arr.shape[0]):
    temp[i] = (cv2.calcHist([arr[i].astype(np.uint8)], [0], None, [256], [0, 256])).flatten()

  return np.asarray(temp)
  

In [None]:
"""----------------------class KNN (faiss)----------------------"""

class FaissKNeighbors:
    def __init__(self, k):
        self.index = None
        self.label = None 
        self.k = k

    def fit(self, image_data, label_data):
        self.index = faiss.IndexFlatL2(image_data.shape[1])
        self.index.add(image_data.astype(np.float32))
        self.label = label_data

    def predict(self, test_data):
        distances, indices = self.index.search(test_data.astype(np.float32), self.k)
        votes = self.label[indices]
        predictions = np.array([np.argmax(np.bincount(x)) for x in votes])
        return predictions


In [None]:
"""-------------------KNN Classifier (faiss)-------------------------"""

def KNN(x_test, y_test, x_train, y_train, k, feature_extraction, shape = -1):  
    """feature extraction"""
    if (shape == -1): 
        x_test = feature_extraction(x_test) 
        x_train = feature_extraction(x_train)
    else: 
        x_test = feature_extraction(x_test, shape) 
        x_train = feature_extraction(x_train, shape)

    """flatten data"""
    x_test = vectorize(x_test) 
    x_train = vectorize(x_train)

    """build model"""
    model = FaissKNeighbors(k) 
    model.fit(x_train, y_train)

    """KNN"""     
    y_pred = model.predict(x_test)  

    print("k = %d, accuracy = %.2f\n" % (k, accuracy_score(y_pred, y_test) * 100)) 
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred), end = "\n\n")
    print("EVALUATION ON TESTING DATA\n", classification_report(y_test, y_pred, digits = 4))

    return y_pred

In [None]:
"""-------------------Sample Mean Classifier (faiss)-------------------------"""

"""calculate sample mean"""
def SM_build(x_train, y_train): 
    cnt_y = np.bincount(y_train) 

    """find shape of SM data"""
    shape = [cnt_y.size]
    for x in x_train[0].shape: 
        shape.append(x) 
    shape = tuple(shape)

    SM_arr = np.zeros(shape)

    """sum"""
    for i in range(x_train.shape[0]): 
        SM_arr[y_train[i]] += x_train[i] 

    """average"""
    for i in range(cnt_y.size): 
        SM_arr[i] /= cnt_y[i] 

    return SM_arr, np.arange(cnt_y.size)

def SM(x_test, y_test, x_train, y_train, feature_extraction, shape = -1):
    """feature extraction"""
    if (shape == -1): 
        x_test = feature_extraction(x_test) 
        x_train = feature_extraction(x_train)
    else: 
        x_test = feature_extraction(x_test, shape) 
        x_train = feature_extraction(x_train, shape)

    """sample mean"""
    x_train, y_train = SM_build(x_train, y_train) 

    """flatten data"""
    x_test = vectorize(x_test) 
    x_train = vectorize(x_train)

    """KNN"""     
    model = FaissKNeighbors(1) 
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)  
    
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred), end = "\n\n")
    print("EVALUATION ON TESTING DATA\n", classification_report(y_test, y_pred, digits = 4))

    return y_pred

In [None]:
"""----------------------load data----------------------"""

x_test, y_test = load_mnist("data/", kind = "test"); 
x_train, y_train = load_mnist("data/", kind = "train") 


In [None]:
"""----------------------test KNN----------------------""" 

print("FEATURE EXTRACTION: VECTORIZE")
y_pred_vtr = KNN(x_test, y_test, x_train, y_train, 3, vectorize)
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: DOWNSAMPLING 7x7")
y_pred_down7 = KNN(x_test, y_test, x_train, y_train, 3,  downsampling, (7, 7))
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: DOWNSAMPLING 14x14")
y_pred_down14 = KNN(x_test, y_test, x_train, y_train, 3, downsampling, (14, 14))
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: HISTORGRAM")
y_pred_htg = KNN(x_test, y_test, x_train, y_train, 3, histogram)


FEATURE EXTRACTION: VECTORIZE
k = 3, accuracy = 97.05

Confusion Matrix:
 [[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  10    9  996    2    0    0    0   13    2    0]
 [   0    2    4  976    1   13    1    7    3    3]
 [   1    6    0    0  950    0    4    2    0   19]
 [   6    1    0   11    2  859    5    1    3    4]
 [   5    3    0    0    3    3  944    0    0    0]
 [   0   21    5    0    1    0    0  991    0   10]
 [   8    2    4   16    8   11    3    4  914    4]
 [   4    5    2    8    9    2    1    8    2  968]]

EVALUATION ON TESTING DATA
               precision    recall  f1-score   support

           0     0.9663    0.9939    0.9799       980
           1     0.9577    0.9982    0.9776      1135
           2     0.9822    0.9651    0.9736      1032
           3     0.9635    0.9663    0.9649      1010
           4     0.9754    0.9674    0.9714       982
           5     0.9663    0.9630    0.96

In [None]:
"""----------------------test Sample Mean Classifier----------------------""" 

print("FEATURE EXTRACTION: VECTORIZE")
sm_y_pred_vtr = SM(x_test, y_test, x_train, y_train, vectorize)
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: DOWNSAMPLING 7x7")
sm_y_pred_down7 = SM(x_test, y_test, x_train, y_train, downsampling, (7, 7))
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: DOWNSAMPLING 14x14")
sm_y_pred_down14 = SM(x_test, y_test, x_train, y_train, downsampling, (14, 14))
print("------------------------------------------------------------\n")

print("FEATURE EXTRACTION: HISTORGRAM")
sm_y_pred_htg = SM(x_test, y_test, x_train, y_train, histogram)
