In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits # The MNIST data set is in scikit learn data set
from sklearn.preprocessing import StandardScaler  # It is important in neural networks to scale the date
from sklearn.model_selection import train_test_split  # The standard - train/test to prevent overfitting and choose hyperparameters
from sklearn.metrics import accuracy_score # 

In [40]:
from sklearn.datasets import load_iris

iris = load_iris()
y_iris = iris.target
X_iris = iris.data
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris,random_state=42)
X_scale = StandardScaler()
X_train_iris = X_scale.fit_transform(X_train_iris)
X_test_iris = X_scale.fit_transform(X_test_iris)


## Original KNN on Iris

In [93]:
# distance function
def euclidean_distance(x1, x2):    
    return np.sqrt(np.sum(np.square(x1 - x2)))

def manhattan_distance(x1, x2):
    return np.sum(x1-x2)

In [94]:
def get_neighbors(X_train, y_train, one_sample, k, distance = euclidean_distance):
    K_Nearest_distance = []
    for i in range(len(X_train)):
        # calculate distance 
        K_Nearest_distance.append(distance(np.array(X_train[i]), np.array(one_sample)))
    
    # get sorted index list
    sorted_indices = np.array(K_Nearest_distance).argsort()
    neighbors = y_train[sorted_indices][:k]
    return neighbors


In [95]:
def predict_label(neighbors):
    # indices of the first occurrences of the unique values in the original array 
    _, indices= np.unique(neighbors, return_index=True)
    # return the first occurrences neighbors
    return neighbors[indices[0]]

In [96]:
def predict_labels(X_train, y_train, X_test, k):
    predicted_labels = []
    for one_sample in X_test:
        neighbors = get_neighbors(X_train, y_train, one_sample, k)
        label = predict_label(neighbors)
        predicted_labels.append(label)
        
    return predicted_labels


In [99]:
k = 3
predicted_labels = np.array(predict_labels(X_train_iris, y_train_iris, X_test_iris, k))
# print(predicted_labels)
accuracy = len(predicted_labels[predicted_labels == y_test_iris]) / len(y_test_iris)
print(f"K is {k}")
print(f"Accuracy is {accuracy}")


K is 3
Accuracy is 0.9473684210526315


## Original KNN on Mnist



In [100]:
digits=load_digits()
X = digits.data
y = digits.target
X_scale = StandardScaler()
X = X_scale.fit_transform(digits.data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)


In [102]:
k = 3
predicted_labels = np.array(predict_labels(X_train, y_train, X_test, k))
# print(predicted_labels)
accuracy = len(predicted_labels[predicted_labels == y_test]) / len(y_test)
print(f"K is {k}")
print(f"Accuracy is {accuracy}")


K is 3
Accuracy is 0.9499304589707928


## Implement Weighted KNN

## Use Gaussian Distribution to Calculate Weight

## Test on Iris dataset

In [133]:
def gaussian(dist, a=1, b=0, c=0.3):
    z = a * np.exp(-(dist - b) ** 2 / (2 * c ** 2))
    return z / np.sum(z)

def inverse(dist):
    return 1 / (dist + 0.2)

In [140]:
def get_neighbors(X_train, y_train, one_sample, k, distance = euclidean_distance):
    K_Nearest_distance = []
    for i in range(len(X_train)):
        # calculate distance 
        K_Nearest_distance.append(distance(np.array(X_train[i]), np.array(one_sample)))
    
    K_Nearest_distance = np.array(K_Nearest_distance)
    inverse_weight = inverse(K_Nearest_distance)
    weighted_K_nearest_distance = inverse_weight * K_Nearest_distance
    
    # get sorted index list
    sorted_indices = weighted_K_nearest_distance.argsort()
    neighbors = y_train[sorted_indices][:k]
    # print(neighbors)
    return neighbors

In [141]:
"""
print(y_test_iris[0])
a = get_neighbors(X_train_iris, y_train_iris, y_test_iris[0], 5)
print(a)
b = predict_label(a)
print(b)
"""

'\nprint(y_test_iris[0])\na = get_neighbors(X_train_iris, y_train_iris, y_test_iris[0], 5)\nprint(a)\nb = predict_label(a)\nprint(b)\n'

In [145]:
k = 3
predicted_labels = np.array(predict_labels(X_train_iris, y_train_iris, X_test_iris, k))
# print(predicted_labels)
accuracy = len(predicted_labels[predicted_labels == y_test_iris]) / len(y_test_iris)
print(f"K is {k}")
print(f"Accuracy is {accuracy}")


K is 3
Accuracy is 0.9473684210526315


## Test on Mnist Dataset



In [146]:
k = 3
predicted_labels = np.array(predict_labels(X_train, y_train, X_test, k))
# print(predicted_labels)
accuracy = len(predicted_labels[predicted_labels == y_test]) / len(y_test)
print(f"K is {k}")
print(f"Accuracy is {accuracy}")



K is 3
Accuracy is 0.9499304589707928
