In [1]:
import operator
import collections

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

In [2]:
print("numpy version: ", np.__version__)
print("pandas version: ", pd.__version__)
print("sklearn version: ", sklearn.__version__)
print("matplotlib version: ", matplotlib.__version__)


# numpy version:  1.24.1
# pandas version:  1.5.2
# sklearn version:  1.2.0
# matplotlib version:  3.6.3

numpy version:  1.24.1
pandas version:  1.5.2
sklearn version:  1.2.1
matplotlib version:  3.6.3


In [3]:
# !pip install openpyxl

## Let's create our KNN class

In [4]:
class KnnRegressor:
    def __init__(self, n_neighbors=5, distance_metric="euclidean"):
        self.k = n_neighbors
        self.metric = distance_metric
    
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    
    def predict(self, X_test):
        labels = []
        
        for i in X_test:
            labels.append(self.__singlepoint_predict(i))
        
        # we just return the mean value
        return np.round(np.mean(labels), 3)
    
    def __singlepoint_predict(self, p):
        all_distance = {}
        counter = 0
        
        if self.metric == "euclidean":
            
            for i in self.X:
                distance = 0
                for j in zip(p, i):
                    distance += (j[0] - j[1])**2
                all_distance[counter] = np.sqrt(distance)
                counter += 1
                
        elif self.metric == "manhattan":
            
            for i in self.X:
                distance = 0
                for j in zip(p, i):
                    distance += abs(j[0] - j[1])
                all_distance[counter] = distance
                counter += 1
                
        label = self.__classify_label(all_distance)
        
        return label

    def __classify_label(self, distances):
        label = []
        distances = sorted(distances.items(), key=lambda item : item[1])
        
        for i in distances[:self.k]:
            label.append(self.y.values[i[0]])
        
        return collections.Counter(label).most_common(1)[0][0]

In [5]:
df = pd.read_excel("D:\DATA SCIENCE ALL RESOURCE\ANALYSIS & VISUALIZATION\MachineLearning\Linear_Regression_multiple_variable.xlsx")

df

Unnamed: 0,speed,car_age,experience,risk
0,200,15,5.0,85
1,90,17,13.0,20
2,165,12,4.0,93
3,110,20,,60
4,140,5,3.0,82
5,115,2,8.0,10


In [6]:
df.isnull().sum()

speed         0
car_age       0
experience    1
risk          0
dtype: int64

In [7]:
df['experience'].fillna(value=df['experience'].mean(), inplace=True)

In [8]:
df

Unnamed: 0,speed,car_age,experience,risk
0,200,15,5.0,85
1,90,17,13.0,20
2,165,12,4.0,93
3,110,20,6.6,60
4,140,5,3.0,82
5,115,2,8.0,10


In [9]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [10]:
X

Unnamed: 0,speed,car_age,experience
0,200,15,5.0
1,90,17,13.0
2,165,12,4.0
3,110,20,6.6
4,140,5,3.0
5,115,2,8.0


In [11]:
y

0    85
1    20
2    93
3    60
4    82
5    10
Name: risk, dtype: int64

In [12]:
y.values

array([85, 20, 93, 60, 82, 10], dtype=int64)

In [13]:
scale = StandardScaler()

X_scale = scale.fit_transform(X=X)

In [14]:
X_scale

array([[ 1.71491599e+00,  4.93714715e-01, -4.85368749e-01],
       [-1.26362231e+00,  8.05534536e-01,  1.94147499e+00],
       [ 7.67199260e-01,  2.59849850e-02, -7.88724216e-01],
       [-7.22069892e-01,  1.27326427e+00, -2.69433780e-16],
       [ 9.02587365e-02, -1.06538439e+00, -1.09207968e+00],
       [-5.86681787e-01, -1.53311412e+00,  4.24697655e-01]])

In [15]:
model = KnnRegressor()

model.fit(X_scale, y)

In [16]:
point = np.array([200,15,5]).reshape(1,3)
point

array([[200,  15,   5]])

In [17]:
result = scale.transform(point)



In [18]:
model.predict(result)

85.0