In [None]:
from scipy.spatial.distance import cdist
from scipy.stats import mode

dists = cdist(X_train, X)
idx = np.argpartition(dists, k, axis=0)[:k]
nearest_dists = np.take(Y_train, idx)
out = mode(nearest_dists,axis=0)[0]

In [None]:
def euclidean_distance(X_train, X_test):
    """
    Create list of all euclidean distances between the given
    feature vector and all other feature vectors in the training set
    """
    return [np.linalg.norm(X - X_test) for X in X_train]

def k_nearest(X, Y, k):
    """
    Get the indices of the nearest feature vectors and return a
    list of their classes
    """
    idx = np.argpartition(X, k)
    return np.take(Y, idx[:k])

def predict(X_test):
    """
    For each feature vector get its predicted class
    """
    distance_list = [euclidean_distance(X_train, X) for X in X_test]
    return np.array([Counter(k_nearest(distances, Y_train, k)).most_common()[0][0] for distances in distance_list])

In [2]:

import numpy as np
import operator
 
def euc_dist(x1, x2):
    return np.sqrt(np.sum((x1-x2)**2))

In [3]:
class KNearestNeighbors():
    
    def __init__(self, K=3):
        self.K = K
 
    def fit(self, x_train, y_train):
        self.X_train = x_train
        self.Y_train = y_train
        
    def predict(self, X_test):
        
        # list to store all our predictions
        predictions = []
        
        # loop over all observations in the test set
        for i in range(len(X_test)):            
            
            # calculate the distance between the test point and all other points in the training set
            dist = np.array([euc_dist(X_test[i], x_t) for x_t in self.X_train])
            
            # sort the distances and return the indices of K neighbors
            dist_sorted = dist.argsort()[:self.K]
            
            # get the neighbors
            neigh_count = {}

            # for each neighbor find the class
            for idx in dist_sorted:
                if self.Y_train[idx] in neigh_count:
                    neigh_count[self.Y_train[idx]] += 1
                else:
                    neigh_count[self.Y_train[idx]] = 1
            
            sorted_neigh_count = sorted(neigh_count.items(), key=operator.itemgetter(1), reverse=True)
            
            # append the class label to the list
            predictions.append(sorted_neigh_count[0][0])
        return predictions

In [4]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

np.random.seed(2020)

In [5]:
data,target = make_classification(n_samples=150, n_classes=2)
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

In [6]:
clf = KNearestNeighbors(K=5)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, predictions))

Accuracy: 0.9333333333333333


In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from scipy.stats import multivariate_normal as mvn
from scipy.stats import multinomial as mlvn
from scipy.stats import bernoulli as brn
%matplotlib inline

In [8]:
train_data = np.load('kmnist-train-imgs.npz')['arr_0']
train_labels = np.load('kmnist-train-labels.npz')['arr_0']
test_data = np.load('kmnist-test-imgs.npz')['arr_0']
test_labels = np.load('kmnist-test-labels.npz')['arr_0']

In [9]:
print(train_data.shape)
print(train_labels.shape)
print(test_data.shape)
test_labels.shape

(60000, 28, 28)
(60000,)
(10000, 28, 28)


(10000,)

In [10]:
x_train = train_data.reshape(-1, 784)
y_train = train_labels
x_test = test_data.reshape(-1, 784)
y_test = test_labels

In [11]:
x_norm = x_train/255
x_test_norm = x_test/255

In [12]:
def accuracy(y, y_hat):
    return np.mean(y == y_hat)

#print(f"Accuracy: {accuracy(y_test, y_hat_vis):0.3f}")

In [None]:
clf = KNearestNeighbors(K=5)
clf.fit(x_norm, y_train)

predictions = clf.predict(x_test_norm)

print('Accuracy:', accuracy_score(y_test, predictions))
print('accuracy method', accuracy(y_test, predictions))