In [1]:
# Importing libraries 
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

In [2]:
class KNearestNeighbor():
    def __init__(self, k):
        self.k =k
    
    def train(self, X_train,y_train):
        self.X_train = X_train
        self.y_train = y_train
    
    def predict(self, X_test):
        distances = self.compute_distance(X_test)
        return self.predict_labels(distances)
    
    def compute_distance(self,X_test):
        num_test =  X_test.shape[0]
        num_train = self.X_train.shape[0]
        distances = np.zeros((num_test,num_train))
        
        for i in range(num_test):
            for j in range(num_train):
                distances[i,j] = np.sqrt(np.sum((X_test[i,:]-self.X_train[j,:])**2))
                
        return distances

    def predict_labels(self,distances):
        num_test = distances.shape[0]
        y_pred = np.zeros(num_test)
        
        for i in range(num_test):
            y_indices = np.argsort(distances[i,:])
            k_closet_classes = self.y_train[y_indices[:self.k]].astype(int)
            y_pred[i] =np.argmax(np.bincount(k_closet_classes))
            
        return y_pred
        

In [3]:
def main():
    
    df = pd.read_csv( "diamonds.csv",nrows=1000)

    label =LabelEncoder()
    
    df["cut"]=label.fit_transform(df["cut"].astype('str'))
    df["color"]=label.fit_transform(df["color"].astype('str'))
    df["clarity"]=label.fit_transform(df["clarity"].astype('str'))

    #features
    X = df.drop('price',axis=1).values
    #target feature
    y = df['price'].values
    
    #spliting
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = None ) 
    
        
    ## Scaling
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    #fiting the model 
    KNN = KNearestNeighbor(k=3)
    KNN.train(X_train,y_train)
    
    #prediction
    y_pred = KNN.predict(X_test)
    
    #check accuracy of model
    print('Accuracy of Scratch Model:',r2_score(y_test,y_pred))
    
    
    #fiting the model by sklearn
    knn = KNeighborsRegressor( n_neighbors = 3 ) 
    knn.fit( X_train, y_train )
    y_pred_sklearn = knn.predict( X_test )
    
    #check accuracy of sklearn model
    print('Accuracy of Sklearn Model:',r2_score(y_test,y_pred_sklearn))
   
    
    
if __name__=='__main__':
    main()
    
    

Accuracy of Scratch Model: 0.992741119379907
Accuracy of Sklearn Model: 0.9954172972163166
