In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from pathlib import Path
from utils.paths import DATA_RAW_DIR

In [2]:
# load data

path_social_network = DATA_RAW_DIR / "w2_Social_Network_Ads.csv"
print(Path(path_social_network).exists())

df_social_network = pd.read_csv(path_social_network, sep=',', encoding='utf-8')
df_social_network.head()

True


Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [3]:
df_social_network.shape

(400, 3)

In [4]:
# sample data

# all columns except the last one
X = df_social_network.iloc[:, :-1].values  
# only the last column
y = df_social_network.iloc[:, -1].values


In [7]:
# split data to train and test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [8]:
# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
# logistic regression classifier

model_lr_classifier = LogisticRegression(random_state=0)
model_lr_classifier.fit(X_train, y_train)


In [10]:
# predict test set results
y_test_pred = model_lr_classifier.predict(X_test)


In [11]:
# evaluate 
# compare y_test_pred with y_test
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_test_pred})
comparison


Unnamed: 0,Actual,Predicted
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
95,1,0
96,0,0
97,1,0
98,1,1


In [12]:
comparison['diff'] = comparison['Actual'] - comparison['Predicted']
comparison['diff'] = comparison['diff'].abs()
comparison

Unnamed: 0,Actual,Predicted,diff
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,0,0,0
...,...,...,...
95,1,0,1
96,0,0,0
97,1,0,1
98,1,1,0


In [13]:
# check differences
comparison[comparison['diff'] != 0]

Unnamed: 0,Actual,Predicted,diff
9,0,1,1
31,1,0,1
55,1,0,1
58,1,0,1
63,1,0,1
73,1,0,1
76,0,1,1
81,0,1,1
88,1,0,1
95,1,0,1
