In [94]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter
from sklearn.preprocessing import StandardScaler

### `Task` Train a KNN model on glass type dataset and find best n_neighnours.

Data Link: https://drive.google.com/file/d/17cbDNBmys04MJqQfrma3jd72VPMnxIq0/view?usp=share_link

In [95]:
# Your code goes here
df = pd.read_csv('/content/glass.csv')

# Using my Own Knn model

In [96]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [97]:
X_train,X_test,y_train,y_test  = train_test_split(X,y,test_size=0.2,random_state=0)

In [98]:
scaling = StandardScaler()
X_train = scaling.fit_transform(X_train)
X_test = scaling.transform(X_test)

In [99]:
class MyKnn:
    def __init__(self, neighbors):
        self.X_train = None
        self.y_train = None
        self.n = neighbors

    def fit(self, X_train, y_train):
        self.X_train = np.asarray(X_train)
        self.y_train = np.asarray(y_train)

    def predict(self, X_test):
        y_pred = []

        for i in X_test:
            distance = []
            for j in self.X_train:
                distance.append(self.calculate_distance(i, j))

            distance = sorted(list(enumerate(distance)), key=lambda x: x[1])[0:self.n]
            y_pred.append(self.calculate_labels(distance))

        return np.array(y_pred)

    def calculate_labels(self, distance):
        labels = []
        for i in distance:
            point = i[0]
            labels.append(self.y_train[point])
        return Counter(labels).most_common(1)[0][0]

    def calculate_distance(self, i, j):
        return np.linalg.norm(i - j)


In [100]:
myknn = MyKnn(3)
myknn.fit(X_train,y_train)
accuracy_score(y_test,myknn.predict(X_test))

0.7209302325581395

# Using Sklearn KNN model

In [101]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [102]:
X_train,X_test,y_train,y_test  = train_test_split(X,y,test_size=0.2,random_state=0)

In [103]:
scaling = StandardScaler()
X_train = scaling.fit_transform(X_train)
X_test = scaling.transform(X_test)

In [104]:
KNN = KNeighborsClassifier(n_neighbors=5)
KNN.fit(X_train,y_train)
accuracy_score(y_test,KNN.predict(X_test))

0.6046511627906976

# Find best n_neighbours

In [105]:
score = []
for i in range(1,15):
  KNN = KNeighborsClassifier(n_neighbors=i)
  KNN.fit(X_train,y_train)
  score.append(accuracy_score(y_test,KNN.predict(X_test)))

In [106]:
score

[0.6744186046511628,
 0.5813953488372093,
 0.6511627906976745,
 0.627906976744186,
 0.6046511627906976,
 0.627906976744186,
 0.5581395348837209,
 0.627906976744186,
 0.6046511627906976,
 0.6046511627906976,
 0.6046511627906976,
 0.5813953488372093,
 0.6046511627906976,
 0.6046511627906976]

In [107]:
KNN = KNeighborsClassifier(n_neighbors=3)
KNN.fit(X_train,y_train)
accuracy_score(y_test,KNN.predict(X_test))

0.6511627906976745

# Using sqrt method

In [108]:
KNN = KNeighborsClassifier(n_neighbors=round(np.sqrt(X.shape[0])))
KNN.fit(X_train,y_train)
accuracy_score(y_test,KNN.predict(X_test))

0.6046511627906976

# Changing parameters

In [109]:
KNN = KNeighborsClassifier(n_neighbors=3,weights='distance',p=2,algorithm='kd_tree')
KNN.fit(X_train,y_train)
accuracy_score(y_test,KNN.predict(X_test))

0.7209302325581395