In [107]:
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from collections import Counter

In [108]:
df = datasets.load_iris()
X = df.data

# 0: setosa, 1: versicolor, 2: virginica
y = df.target

# Splitting the dataset into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Function to calculate the euclidean distance
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

# Function to calculate the manhattan distance
def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

def cosinus_distance(x1, x2):
    # Calculate the dot product between the two vectors
    dot_product = np.dot(x1, x2)

    # Calculate the L2 norm of the first vector
    norm_x1 = np.linalg.norm(x1)

    # Calculate the L2 norm of the second vector
    norm_x2 = np.linalg.norm(x2)

    # Calculate the cosine similarity
    cosine_similarity = dot_product / (norm_x1 * norm_x2)

    # Calculate the cosine distance
    cosine_distance = 1 - cosine_similarity
    
    return cosine_distance

# KNN class
class KNN:
    def __init__(self, k=3, weighted=False):
        self.k = k
        self.weighted = weighted
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X, metric='euclidean'):
        y_pred = [self._predict(x, metric) for x in X]
        return np.array(y_pred)
    
    def _predict(self, x, metric='euclidean'):
        # Compute distances between x and all examples in the training set
        if metric == 'euclidean':
            distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        elif metric == 'manhattan':
            distances = [manhattan_distance(x, x_train) for x_train in self.X_train]
        elif metric == 'cosinus':
            distances = [cosinus_distance(x, x_train) for x_train in self.X_train]
        else:
            raise NotImplementedError()

        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        if self.weighted:
            k_nearest_distances = [distances[i] for i in k_indices]

            # Calculate the weights
            weights = [1 / (distance + 1e-8) for distance in k_nearest_distances]
                
            # Calculate the weighted sum of the labels
            weighted_sum = np.dot(weights, k_nearest_labels)

            # Calculate the weighted average
            weighted_average = weighted_sum / np.sum(weights)

            return weighted_average
        else:

            # Return the most common class label
            most_common = Counter(k_nearest_labels).most_common(1)
            return most_common[0][0]
    
    
    def score(self, X_test, y_test, metric='euclidean'):
        y_pred = self.predict(X_test, metric)
        return np.sum(y_pred == y_test) / len(y_test)

In [109]:
# Instantiate the model
clf = KNN(k=3)

# Fit the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='euclidean')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='euclidean'))

Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
Accuracy: 1.0


In [110]:
clf = KNN(k=3)

# Fit the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='manhattan')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='manhattan'))

Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
Accuracy: 1.0


In [111]:
clf = KNN(k=3)

# Fit the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='cosinus')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='cosinus'))

Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 1 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
Accuracy: 0.9777777777777777


In [112]:
clf = KNN(k=3, weighted=True)

clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='euclidean')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='euclidean'))


Predictions: [1.         0.         2.         1.         1.         0.
 1.         2.         1.24440803 1.         1.7519386  0.
 0.         0.         0.         1.31717204 2.         1.
 1.         2.         0.         2.         0.         2.
 2.         2.         2.         2.         0.         0.
 0.         0.         1.         0.         0.         2.
 1.         0.         0.         0.         2.         1.
 1.         0.         0.        ]
Accuracy: 0.9333333333333333


In [113]:

clf = KNN(k=3, weighted=True)
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='manhattan')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='manhattan'))

Predictions: [1.         0.         2.         1.         1.         0.
 1.         2.         1.30534351 1.         1.8        0.
 0.         0.         0.         1.         2.         1.
 1.         2.         0.         2.         0.         2.
 2.         2.         2.         2.         0.         0.
 0.         0.         1.         0.         0.         1.67567568
 1.         0.         0.         0.         1.99999998 1.
 1.         0.         0.        ]
Accuracy: 0.9111111111111111


In [114]:
clf = KNN(k=3, weighted=True)
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test, metric='cosinus')

print("Predictions:", predictions)

# Calculate the accuracy
print("Accuracy:", clf.score(X_test, y_test, metric='cosinus'))

Predictions: [1.        0.        2.        1.        1.        0.        1.
 2.        1.        1.        2.        0.        0.        0.
 0.        1.        2.        1.        1.        2.        0.
 2.        0.        2.        1.3207108 2.        2.        2.
 0.        0.        0.        0.        1.        0.        0.
 2.        1.        0.        0.        0.        2.        1.
 1.        0.        0.       ]
Accuracy: 0.9777777777777777
