Author: Python Engineer: Machine Learning algorithm implementations from scratch. https://www.youtube.com/playlist?list=PLqnslRFeH2Upcrywf-u2etjdxxkL8nl7E

# Machine Learning algorithm implementations from scratch.

You can find Tutorials with the math and code explanations [Here](https://www.youtube.com/playlist?list=PLqnslRFeH2Upcrywf-u2etjdxxkL8nl7E)

![image.png](attachment:image.png)

In [15]:
from collections import Counter
import numpy as np

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))


class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_idx = np.argsort(distances)[: self.k]
        # Extract the labels of the k nearest neighbor training samples
        k_neighbor_labels = [self.y_train[i] for i in k_idx]
        # return the most common class label
        most_common = Counter(k_neighbor_labels).most_common(1)
        return most_common[0][0]

In [16]:
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.model_selection import train_test_split

cmap = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

iris = datasets.load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

k = 3
clf = KNN(k=k)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print("KNN classification accuracy", accuracy(y_test, predictions))


KNN classification accuracy 1.0


In [17]:
# The following are testings

In [18]:
X_train=X_train[0:10,:]
y_train=y_train[0:10]
print(X_train)
print(y_train)

[[5.1 2.5 3.  1.1]
 [6.2 2.8 4.8 1.8]
 [5.  3.5 1.3 0.3]
 [6.3 2.8 5.1 1.5]
 [6.7 3.  5.  1.7]
 [4.8 3.4 1.9 0.2]
 [4.4 2.9 1.4 0.2]
 [5.4 3.4 1.7 0.2]
 [4.6 3.6 1.  0.2]
 [5.  2.3 3.3 1. ]]
[1 2 0 2 1 0 0 0 0 1]


In [19]:
# Compute distances between x and all examples in the training set
x=X_test[0,:]
distances = [euclidean_distance(x, x_train) for x_train in X_train] # 
print(x)
print(distances)

[6.1 3.  4.6 1.4]
[1.9748417658131496, 0.5000000000000003, 3.6823905279043934, 0.5830951894845302, 0.7810249675906661, 3.252691193458118, 3.818376618407356, 3.2403703492039293, 4.124318125460256, 1.884144368141677]


In [20]:
# Sort by distance and return indices of the first k=3 neighbors
k_idx = np.argsort(distances)[:3]
print(k_idx)

[1 3 4]


In [21]:
# Extract the labels of the k nearest neighbor training samples
k_neighbor_labels = [y_train[i] for i in k_idx]
print(k_neighbor_labels)

[2, 2, 1]


In [22]:
# return the most common class label
most_common = Counter(k_neighbor_labels).most_common(1)
print(most_common)
print(most_common[0][0])

[(2, 2)]
2


In [23]:
# This is a test
from collections import Counter
a=[10,10,10,10,20,20,20,30,40,50,60]
most_common=Counter(a).most_common(1) # 第一個最多的數字
print(most_common) # 10最多而且有四個，回傳list
print(most_common[0])
print(most_common[0][0])

[(10, 4)]
(10, 4)
10


In [24]:
# This is a test
most_common=Counter(a).most_common(2)
print(most_common)
print(most_common[1])
print(most_common[1][0])

[(10, 4), (20, 3)]
(20, 3)
20
