In [44]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [45]:
shop_data = pd.read_csv("../../dataset/classification/online_shop.csv")
shop_data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [46]:
shop_data['Gender'] = pd.get_dummies(shop_data['Gender']).drop('Female', axis=1)
shop_data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [47]:
X = shop_data.iloc[:, 0:3]
y= shop_data['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42)

In [48]:
scaler = StandardScaler()
scale_X_train = scaler.fit_transform(X_train)
scale_X_test = scaler.transform(X_test)

In [49]:
logR = LogisticRegression(solver='liblinear')
logR.fit(scale_X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [50]:
logR.predict(scale_X_test)

array([0, 1, 0, 1])

In [51]:
logR.predict_proba(scale_X_test)

array([[0.72562477, 0.27437523],
       [0.01766318, 0.98233682],
       [0.9843013 , 0.0156987 ],
       [0.12965975, 0.87034025]])

In [52]:
logR.decision_function(scale_X_test)

array([-0.97253644,  4.01845199, -4.13835429,  1.90397052])

In [53]:
logR.coef_

array([[0.13929667, 2.25647996, 1.12554254]])

In [54]:
logR.intercept_

array([-1.07657797])

In [55]:
c= logR.coef_

In [56]:
X_test

Unnamed: 0,Gender,Age,EstimatedSalary
209,0,46,22000
280,0,59,88000
33,0,28,44000
210,0,48,96000


In [57]:
sample = scaler.transform([X_test.iloc[0].to_numpy()])
sample

array([[-0.98994949,  0.80707901, -1.40307395]])

In [58]:
# lin_pred = c[0][0]*sample[0][0]+c[0][1]*sample[0][1]+c[0][2]*sample[0][2]+logR.intercept_[0]
# print(lin_pred)

In [59]:
lin_pred = np.sum(np.multiply(sample, c))+logR.intercept_[0]

In [72]:
prob_1=1/(1+np.exp(-lin_pred))
prob_0=1/(1+np.exp(lin_pred))
print(f"0: {prob_0}, 1: {prob_1}")

0: 0.7256247745705376, 1: 0.27437522542946235


In [63]:
logR.predict_proba(sample)

array([[0.72562477, 0.27437523]])

In [66]:
logR.predict(sample)

array([0])