In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Load the dataset (replace 'your_dataset.csv' with your actual dataset file)
data = pd.read_csv('./data/purchase-history.csv', delimiter=';')

# remove the id column
data = data.drop(['id'], axis=1)
data

Unnamed: 0,gender,age,estimated_salary,purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [3]:
# Encode the 'gender' column using one-hot encoding
data = pd.get_dummies(data, columns=['gender'], drop_first=True)

data

Unnamed: 0,age,estimated_salary,purchased,gender_Male
0,19,19000,0,1
1,35,20000,0,1
2,26,43000,0,0
3,27,57000,0,0
4,19,76000,0,1
...,...,...,...,...
395,46,41000,1,0
396,51,23000,1,1
397,50,20000,1,0
398,36,33000,0,1


In [4]:
# Define the features (X) and target variable (y)
X = data[['gender_Male', 'age', 'estimated_salary']]
y = data['purchased']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Choose the value of K
k = 8

# Create a KNN classifier with the chosen K value
knn = KNeighborsClassifier(n_neighbors=k)

# Train the KNN model on the training data
knn.fit(X_train, y_train)

# Predict on the test data
y_pred = knn.predict(X_test)

# Evaluate the model's accuracy
accuracy = (y_pred == y_test).mean()
print(f"Accuracy of KNN with K={k}: {accuracy:.2%}")

Accuracy of KNN with K=8: 93.75%


In [10]:
# Ask the user for input
user_gender = input("Enter your gender (Male/Female): ").strip().lower()
user_age = float(input("Enter your age: "))
user_salary = float(input("Enter your estimated annual salary: "))

# Encode the user's gender input
if user_gender == 'male':
    user_gender_encoded = 1
    user_gender_not_encoded = 0
else:
    user_gender_encoded = 0
    user_gender_not_encoded = 1

# Create a feature vector from user input
user_input = [user_gender_encoded, user_age, user_salary]
print(user_input)

# Standardize the user input (using the same scaler as before)
user_input = scaler.transform([user_input])

# Predict whether the user will make a purchase
user_prediction = knn.predict(user_input)
print(user_prediction)

# Display the prediction
if user_prediction[0] == 1:
    print("Based on the provided information, you are likely to make a purchase.")
else:
    print("Based on the provided information, you are not likely to make a purchase.")

[1, 24.0, 20000.0]
[0]
Based on the provided information, you are not likely to make a purchase.


