In [10]:
# Imports
import numpy as np

# Class
class k_nn:
    # Initialize

    # Calc euclidean distance to compare neighbors
    # Fit - predict - display 

    def __init__(self, num_neighbors=5):
        """Init definition"""
        self.num_neighbors = num_neighbors

    # Euclidean distance
    def euclidean_distance(self, a, b):
        """Returns euclidean distance between rows"""
        euclidean_distance_sum = 0.0  # initial value
        for i in range(len(a)):
            euclidean_distance_sum += (a[i] - b[i]) ** 2
        """Subtract - square - add to euclidean_distance_sum"""
        euclidean_distance = np.sqrt(euclidean_distance_sum)
        return euclidean_distance

    # Fit k Nearest Neighbors
    def fit_knn(self, X_train, y_train):
        """Fits the model using training data. X_train and y_train inputs for func"""
        self.X_train = X_train
        self.y_train = y_train

    # Predict X for kNN
    def predict_knn(self, X):
        """Return predictions for X based on the fit X_train and y_train data"""
        # initialize prediction_knn as empty list
        prediction_knn = []
        for i in range(len(X)):
            # initialize euclidean_distance as empty list
            euclidean_distance = []

            for row in self.X_train:
                # find eucl_distance to X using
                # euclidean_distance() function call and append to euclidean_distance list
                euclidean_distance_sum = self.euclidean_distance(row, X[i])
                euclidean_distance.append(euclidean_distance_sum)
            neighbors = np.array(euclidean_distance).argsort()[: self.num_neighbors]
            
            # initialize dict to count class occurrences in y_train
            neighbor_count = {}
            for num in neighbors:
                if self.y_train[num] in neighbor_count:
                    neighbor_count[self.y_train[num]] += 1
                else:
                    neighbor_count[self.y_train[num]] = 1

            # max count labels to prediction_knn
            prediction_knn.append(max(neighbor_count, key=neighbor_count.get))

        return prediction_knn

    # display list of nearest_neighbors & euclidian dist
    def display_knn(self, x):
        """Inputs -- x // outputs a list w/ nearest neighbors and euclidean distance."""
        # initialize euclidean_distance as empty list
        euclidean_distance = []
        for row in self.X_train:
            euclidean_distance_sum = self.euclidean_distance(row, x)
            euclidean_distance.append(euclidean_distance_sum)
        neighbors = np.array(euclidean_distance).argsort()[: self.num_neighbors]
    
        # empty display_knn_values list
        display_knn_values = []
        for i in range(len(neighbors)):
            n_i = neighbors[i]
            e_dist = euclidean_distance[i]
            display_knn_values.append((n_i, e_dist))  # changed to list of tuples
        return display_knn_values

In [16]:
# import sklearn models
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load data
iris = load_iris()
data = iris.data
target = iris.target

# Train/Test splits
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3)
# Sklearn-learn KNN Classifier
clf = KNeighborsClassifier(n_neighbors=10)
# Fit
clf.fit(X_train, y_train)
# Prediction
predict = clf.predict(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"Scikit-learn KNN classifier accuracy Iris Dataset: {accuracy_score(y_test, predict)}")
# y_pred
y_pred = clf.predict([X_test[0]])
print("y_pred:", y_pred)

# my model
classifier = k_nn(num_neighbors=10)
# Fit
classifier.fit_knn(X_train, y_train)
# Prediction
predict = classifier.predict_knn(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"k_nn Model (Self Built) Accuracy Iris Dataset: {accuracy_score(y_test, predict)}")
y_pred = classifier.predict_knn([X_test[0]])
print("y_pred:", y_pred)
# Neighbor index and euclidean distance
neighbors = classifier.display_knn(X_test[0])
print("Neighbors & euclidean dist:", neighbors)

Prediction: [0 2 0 1 0 2 0 0 1 2 0 2 0 2 1 0 1 0 2 2 2 0 0 1 2 1 2 0 1 1 2 0 1 2 0 1 2
 0 1 0 1 2 2 1 1]
Scikit-learn KNN classifier accuracy Iris Dataset: 0.9333333333333333
y_pred: [0]
Prediction: [0, 2, 0, 1, 0, 2, 0, 0, 1, 2, 0, 2, 0, 2, 1, 0, 1, 0, 2, 2, 2, 0, 0, 1, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1, 2, 2, 1, 1]
k_nn Model (Self Built) Accuracy Iris Dataset: 0.9555555555555556
y_pred: [0]
Neighbors & euclidean dist: [(35, 2.8809720581775866), (45, 1.086278049120022), (99, 4.196427051671457), (76, 4.236744032862973), (20, 3.1764760348537187), (85, 3.9686269665968865), (72, 0.48989794855663604), (19, 3.218695387886217), (52, 3.96232255123179), (51, 4.386342439892263)]


In [17]:
# import sklearn models
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load data
bc = load_breast_cancer()
data = bc.data
target = bc.target

# Train/Test splits
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3)
# Sklearn-learn KNN Classifier
clf = KNeighborsClassifier(n_neighbors=10)
# Fit
clf.fit(X_train, y_train)
# Prediction
predict = clf.predict(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"Scikit-learn KNN Classifier Accuracy Breast Cancer Dataset: {accuracy_score(y_test, predict)}")
# y_pred
y_pred = clf.predict([X_test[0]])
print("y_pred:", y_pred)

# my model
classifier = k_nn(num_neighbors=10)
# Fit
classifier.fit_knn(X_train, y_train)
# Prediction
predict = classifier.predict_knn(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"k_nn Model (Self Built) Accuracy Breast Cancer Dataset: {accuracy_score(y_test, predict)}")
y_pred = classifier.predict_knn([X_test[0]])
print("y_pred:", y_pred)
# Neighbor index and euclidean distance
neighbors = classifier.display_knn(X_test[0])
print("Neighbors & euclidean dist:", neighbors)

Prediction: [1 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1
 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 0 1 0
 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 0 1 1 1 1
 1 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 1
 1 0 1 0 1 0 0 0 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1]
Scikit-learn KNN Classifier Accuracy Breast Cancer Dataset: 0.9473684210526315
y_pred: [1]
Prediction: [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1]
k_nn Model (Self Built)

In [19]:
# import sklearn models
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load data
wine = load_wine()
data = wine.data
target = wine.target

# Train/Test splits
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3)
# Sklearn-learn KNN Classifier
clf = KNeighborsClassifier(n_neighbors=10)
# Fit
clf.fit(X_train, y_train)
# Prediction
predict = clf.predict(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"Scikit-learn KNN Classifier Accuracy Wine Dataset: {accuracy_score(y_test, predict)}")
# y_pred
y_pred = clf.predict([X_test[0]])
print("y_pred:", y_pred)

# my model
classifier = k_nn(num_neighbors=10)
# Fit
classifier.fit_knn(X_train, y_train)
# Prediction
predict = classifier.predict_knn(X_test)
print("Prediction:", predict)
# Accuracy Score
print(f"k_nn Model (Self Built) Accuracy Wine Dataset: {accuracy_score(y_test, predict)}")
y_pred = classifier.predict_knn([X_test[0]])
print("y_pred:", y_pred)
# Neighbor index and euclidean distance
neighbors = classifier.display_knn(X_test[0])
print("Neighbors & euclidean dist:", neighbors)

Prediction: [2 2 0 2 0 2 2 0 1 1 2 0 1 0 0 1 0 0 1 0 1 0 1 0 2 2 1 1 0 1 2 1 2 0 2 0 0
 2 2 1 2 1 1 1 0 2 0 1 1 2 1 1 1 1]
Scikit-learn KNN Classifier Accuracy Wine Dataset: 0.7037037037037037
y_pred: [2]
Prediction: [2, 2, 0, 2, 0, 2, 2, 0, 1, 1, 2, 0, 1, 0, 0, 1, 0, 0, 2, 0, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 2, 0, 2, 0, 0, 2, 2, 1, 2, 1, 1, 1, 0, 2, 0, 1, 1, 2, 1, 1, 1, 1]
k_nn Model (Self Built) Accuracy Wine Dataset: 0.7222222222222222
y_pred: [2]
Neighbors & euclidean dist: [(39, 451.9838032496297), (14, 717.4694437395923), (67, 230.75844686598148), (10, 416.7816574658727), (77, 156.98511426246756), (17, 337.91355995283766), (115, 321.8677829482162), (22, 230.254045132762), (106, 397.66150354290016), (78, 312.4423826883926)]
