## Importing the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Load the dataset

In [2]:
data = pd.read_csv('Social_Network_Ads.csv')
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


## Check for missing values

In [3]:
data.isnull().sum()

Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

## Divide the data into X(Independent) and y(Dependent)

In [4]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X.shape, y.shape

((400, 2), (400,))

## Divide the data into train_test_split

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
X_train.shape, X_test.shape

((280, 2), (120, 2))

## Feature Scaling

In [6]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()

ss.fit_transform(X_train)

array([[-1.1631724 , -1.5849703 ],
       [ 2.17018137,  0.93098672],
       [ 0.0133054 ,  1.22017719],
       [ 0.20938504,  1.07558195],
       [ 0.40546467, -0.48604654],
       [-0.28081405, -0.31253226],
       [ 0.99370357, -0.8330751 ],
       [ 0.99370357,  1.8563962 ],
       [ 0.0133054 ,  1.24909623],
       [-0.86905295,  2.26126285],
       [-1.1631724 , -1.5849703 ],
       [ 2.17018137, -0.80415605],
       [-1.35925203, -1.46929411],
       [ 0.40546467,  2.2901819 ],
       [ 0.79762394,  0.75747245],
       [-0.96709276, -0.31253226],
       [ 0.11134522,  0.75747245],
       [-0.96709276,  0.55503912],
       [ 0.30742485,  0.06341534],
       [ 0.69958412, -1.26686079],
       [-0.47689368, -0.0233418 ],
       [-1.7514113 ,  0.3526058 ],
       [-0.67297331,  0.12125343],
       [ 0.40546467,  0.29476771],
       [-0.28081405,  0.06341534],
       [-0.47689368,  2.2901819 ],
       [ 0.20938504,  0.03449629],
       [ 1.28782302,  2.20342476],
       [ 0.79762394,

In [7]:
ss.fit_transform(X_test)

array([[-0.64807267,  0.53080315],
       [ 0.07535729, -0.59737588],
       [-0.19592895,  0.16490725],
       [-0.64807267,  0.28687255],
       [-0.19592895, -0.59737588],
       [-0.9193589 , -1.51211563],
       [-0.55764392, -1.66457226],
       [-0.1055002 ,  2.26880869],
       [-1.7332176 , -0.04853203],
       [ 0.88921599, -0.81081516],
       [-0.64807267, -0.6278672 ],
       [-0.82893016, -0.44491925],
       [-0.01507146, -0.44491925],
       [ 0.16578603,  0.2258899 ],
       [-1.55236011,  0.50031183],
       [-0.46721518,  1.44554291],
       [-0.01507146,  0.2258899 ],
       [-1.64278886,  0.4698205 ],
       [ 1.61264594,  1.84193014],
       [-0.19592895, -1.45113298],
       [-0.19592895, -0.68884985],
       [ 0.88921599,  2.26880869],
       [ 0.34664352, -0.56688455],
       [ 0.88921599,  1.07964701],
       [-1.28107388, -1.26818503],
       [ 1.07007347,  2.17733471],
       [-0.82893016,  0.53080315],
       [-0.73850141,  0.31736388],
       [-0.01507146,

## Apply the KNN Classifier Algorithm on train dataset

In [8]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train)

KNeighborsClassifier()

## Check the predictions on the test dataset

In [9]:
y_pred = knn.predict(X_test)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0], dtype=int64)

## Comparing the prediction and test set results

In [10]:
y_pred = knn.predict(X_test)
y_test = y_test.values
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 1]]


## Creating a Confusion Matrix and Checking

In [11]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_pred, y_test)
print(cm)

[[69 11]
 [10 30]]


## Calculating Precision, Recall, F1 score and Accuracy

- True Negative: 69
- False Negative: 11
- False Positive : 10
- True positive: 30

In [12]:
Precision = 30 / (30 + 10)
Precision

0.75

In [13]:
recall = 30 / (30 + 11)
recall

0.7317073170731707

In [14]:
f1_score = (2* Precision * recall) / (Precision + recall)
f1_score

0.7407407407407408

In [15]:
Accuracy = (69 + 30) / (69 + 11 + 10 + 30)
Accuracy

0.825

## Checking Accuracy Score

In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.825

## Printing the classification Report

In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.87      0.87        79
           1       0.75      0.73      0.74        41

    accuracy                           0.82       120
   macro avg       0.81      0.80      0.80       120
weighted avg       0.82      0.82      0.82       120

