### Importing packages:

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

### Reading data:

In [2]:
train = pd.read_csv("/kaggle/input/mobile-price-classification/train.csv")

In [3]:
train.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [5]:
train.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

- There are no null values in the dataset.

### Building KNN:

KNN is a supervised algorithm which is used for classification.

Algorithm:
- Select the 'K' number of points 
- Calculate the distance of the 'k' number of points.
- Take the k-nearest neighbours as specified.
- Count which points belongs to which category among the k-neighbours,
  Assign the point to the maximum count of the chosen category.

In [9]:
# splitting the dataset
X = train.iloc[:,:-1].values
y = train.iloc[:,-1].values

In [10]:
X

array([[8.420e+02, 0.000e+00, 2.200e+00, ..., 0.000e+00, 0.000e+00,
        1.000e+00],
       [1.021e+03, 1.000e+00, 5.000e-01, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [5.630e+02, 1.000e+00, 5.000e-01, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       ...,
       [1.911e+03, 0.000e+00, 9.000e-01, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [1.512e+03, 0.000e+00, 9.000e-01, ..., 1.000e+00, 1.000e+00,
        1.000e+00],
       [5.100e+02, 1.000e+00, 2.000e+00, ..., 1.000e+00, 1.000e+00,
        1.000e+00]])

In [11]:
y

array([1, 2, 2, ..., 3, 0, 3])

In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=69)

In [24]:
class KNN(object):
    
    def __init__(self, k, dist=None):
        self.k = k
        if dist is None:
            self.dist = self._euclidean_dist
        else:
            self.dist = dist
            
    def _euclidean_dist(self, a, b):
        distance = 0
        for i, j in zip(a, b):
            distance += ((i-j)**2)
        distance = distance**0.5
        return distance
    
    def _find_nearest(self, x):
        pairs = []
        for i in range(self.dataset.shape[0]):
            d = self.dist(x, self.dataset[i])
            pairs.append((d, i))
        
        sorted_pairs = sorted(pairs)
        
        return sorted_pairs
    
    def fit(self, X, y):
        self.dataset = X.copy()
        self.labels = y.copy()
        self.possible_labels_ = np.unique(y)
        
    def predict(self, X):
        predictions = np.zeros(X.shape[0], dtype=int)
        for i in range(X.shape[0]):
            k_nearest = self._find_nearest(X[i])[:self.k]
            indices = [entry[1] for entry in k_nearest]
            k_labels = self.labels[indices]
            counts = np.bincount(k_labels, minlength=self.possible_labels_.shape[0])
            pred_label = np.argmax(counts)
            predictions[i] = pred_label
        return predictions


In [25]:
knn_model = KNN(k=5)
knn_model.fit(X_train,y_train)

In [26]:
print(knn_model.predict(X_test))

[3 0 2 3 0 1 1 1 0 2 0 0 1 1 3 2 1 3 3 3 3 2 0 0 2 0 3 3 3 0 2 0 0 1 0 1 2
 0 0 1 3 2 0 3 3 0 0 3 3 2 1 0 2 1 3 2 0 2 0 1 2 3 0 1 3 1 0 1 3 1 0 0 2 3
 3 3 3 1 1 2 0 2 1 3 0 1 2 3 0 0 1 2 2 2 2 3 1 0 0 3 1 0 3 3 1 3 1 1 1 2 3
 3 2 1 0 3 3 2 3 3 0 2 1 1 0 2 1 0 0 1 1 0 2 3 3 1 1 0 2 2 0 1 2 2 1 1 3 2
 0 2 3 1 2 2 1 3 3 2 3 2 2 1 3 3 3 3 0 0 2 0 3 2 0 1 1 3 0 2 0 0 1 2 2 2 0
 1 1 1 1 0 3 3 2 2 3 3 1 2 2 2 2 0 1 1 3 2 2 0 0 0 2 3 0 1 1 1 0 0 0 0 2 3
 0 0 0 2 3 2 0 2 2 3 0 1 2 0 3 1 1 3 0 0 3 2 1 3 1 2 3 3 3 0 0 0 1 2 2 1 3
 3 3 3 0 1 2 3 1 0 2 1 2 0 0 0 3 1 1 0 1 1 0 0 2 1 3 3 1 2 1 2 2 1 1 1 0 2
 2 1 3 0 0 1 3 3 3 1 1 0 0 2 0 0 3 3 1 3 0 1 2 1 3 1 0 0 3 1 0 0 0 1 1 3 0
 2 1 3 2 1 2 3 2 1 2 3 3 1 2 3 1 0 3 3 2 0 0 0 1 3 0 1 3 2 1 0 1 3 1 2 1 3
 1 0 2 0 0 2 2 1 1 2 0 1 3 1 2 3 1 1 1 1 1 3 3 2 3 0 1 0 1 3 3 1 3 2 0 1 3
 3 3 3 2 1 3 2 1 3 0 0 0 2 0 2 3 2 1 2 3 2 2 2 3 3 2 3 2 2 3 0 1 2 3 2 1 3
 1 0 3 2 2 2 3 1 3 2 0 2 3 1 2 2 0 3 3 0 3 1 2 1 1 0 0 2 0 1 2 0 2 3 3 1 0
 0 2 2 0 3 2 2 3 1 2 2 2 

In [28]:
y_pred = knn_model.predict(X_test)

In [27]:
def pred_accuracy(x,y):
    num_correct = np.sum(y_pred == y_test)
    accuracy = num_correct / len(y_test) * 100
    return accuracy

In [29]:
pred_accuracy(y_pred,y_test)

93.0