In [1]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
%matplotlib inline

In [2]:
dataset = pd.read_csv("../data/Social_Network_Ads.csv")
dataset = dataset.drop("User ID", axis=1)
dataset

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [3]:
age_bins = [18, 30, 45, 60]
age_labels = ["Young", "MiddleAged", "Old"]

income_bins = [15000, 50000, 100000, 150000]
income_labels = ["Low", "Middle", "High"]

dataset["Age Group"] = pd.cut(dataset["Age"], bins=age_bins, labels=age_labels, include_lowest=True)
dataset["Income Group"] = pd.cut(dataset["EstimatedSalary"], bins=income_bins, labels=income_labels, include_lowest=True)

dataset = dataset.drop(["Age", "EstimatedSalary"], axis=1)
dataset = pd.get_dummies(dataset, columns=["Gender", "Age Group", "Income Group"])
dataset

Unnamed: 0,Purchased,Gender_Female,Gender_Male,Age Group_Young,Age Group_MiddleAged,Age Group_Old,Income Group_Low,Income Group_Middle,Income Group_High
0,0,0,1,1,0,0,1,0,0
1,0,0,1,0,1,0,1,0,0
2,0,1,0,1,0,0,1,0,0
3,0,1,0,1,0,0,0,1,0
4,0,0,1,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...
395,1,1,0,0,0,1,1,0,0
396,1,0,1,0,0,1,1,0,0
397,1,1,0,0,0,1,1,0,0
398,0,0,1,0,1,0,1,0,0


In [4]:
X = dataset.drop("Purchased", axis=1)
y = dataset["Purchased"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [6]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.93
Report:
              precision    recall  f1-score   support

           0       0.97      0.94      0.95        32
           1       0.78      0.88      0.82         8

    accuracy                           0.93        40
   macro avg       0.87      0.91      0.89        40
weighted avg       0.93      0.93      0.93        40

Confusion Matrix:
[[30  2]
 [ 1  7]]
