In [1]:
# I have a dataset that shows which users have purchased an iPhone. 
# My goal in this project is to predict if the customer will purchase an iPhone or not using KNN,
# given their gender, age and salary.

In [2]:
# importing libraries
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# reading the dataset
data = pd.read_csv("iphone_purchase_records.csv")
data.head(10)

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
5,Male,27,58000,0
6,Female,27,84000,0
7,Female,32,150000,1
8,Male,25,33000,0
9,Female,35,65000,0


In [4]:
# create dependent and independent variable
X = data.iloc[:, :-1].values
Y = data.iloc[:, 3].values

In [5]:
# converting Gender (Male, Female) to Number (Male = 1, Female = 0)
labelEncoder_gender = LabelEncoder()
X[:, 0] = labelEncoder_gender.fit_transform(X[:,0])

# converting "X" to int64 type
X = np.vstack(X[:,:]).astype(np.int64)

In [6]:
# splitting data into train and test dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [7]:
# scaling our data for better performance
st_Scaler = StandardScaler()
X_train = st_Scaler.fit_transform(X_train)
X_test = st_Scaler.transform(X_test)

In [8]:
# initializing KNN model
knn = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=2)   # p=2 Euclidean distance; p=1 Manhattan distance
knn.fit(X_train, Y_train)

KNeighborsClassifier()

In [9]:
# prediction
prediction = knn.predict(X_test)

In [10]:
# checking the Confusion Matrix
confusionMatrix = metrics.confusion_matrix(Y_test, prediction)
print(f"Confusion Metrix: {confusionMatrix}")
# checking the Accuracy
accuracy = metrics.accuracy_score(Y_test, prediction)
print(f"Accuracy: {accuracy}")
# checking the Precision
precision = metrics.precision_score(Y_test, prediction)
print(f"Precision: {precision}")
# checking the Recall
recall = metrics.recall_score(Y_test, prediction)
print(f"Recall: {recall}")

Confusion Metrix: [[64  4]
 [ 3 29]]
Accuracy: 0.93
Precision: 0.8787878787878788
Recall: 0.90625


**Without scaling the train and test data results:**

| Confusion Matrix  |  Accuracy  |  Precision  |   Recall  | Incorrect prediction |
|-------------------|------------|-------------|-----------|----------------------|
|[[59, 9], [8 24]]  |    0.83    |   0.7272    |  0.75000  |     9 + 8  = 17      |


**With scaling the train and test data results:**

| Confusion Matrix  |  Accuracy  |  Precision  |   Recall  | Incorrect prediction |
|-------------------|------------|-------------|-----------|----------------------|
|[[64, 4], [3 29]]  |    0.93    |   0.8787    |  0.90625  |      4 + 3  = 7      |

