In [41]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt

In [35]:
df = pd.read_csv("../data/binary_classification.csv")

x = df.iloc[:, :-1]
y = df.iloc[:, -1]

x_ssc = StandardScaler()
x_scaled = x_ssc.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x_scaled, y.values, test_size=0.2, shuffle=True)

In [36]:
def sigmoid_function(h_x):
    return 1/(1+np.exp(-h_x))

def binary_classification_loss_function(y_real, y_pred):
    loss = (
        -(y_real*np.log(y_pred) + (1-y_real)*np.log(1-y_pred))
    ).mean()
    return loss

def get_derivative_of_cost_w(y_real, y_pred, x_real):
    derivative_of_cost_w = -2 * (y_real - y_pred).dot(x_real)
    return derivative_of_cost_w

def get_derivative_of_cost_b(y_real, y_pred):
    derivative_of_cost_b = -2 * (y_real - y_pred).sum()
    return derivative_of_cost_b

In [37]:
w = np.zeros(x.shape[-1])
b = np.array([0])

In [38]:
print("x_train.shape : ",x_train.shape)
print("y_train.shape : ",y_train.shape)
print("w.shape : ",w.shape)
print("b.shape : ",b.shape)

x_train.shape :  (455, 30)
y_train.shape :  (455,)
w.shape :  (30,)
b.shape :  (1,)


In [39]:
learning_rate = 1e-5

for i in range(3000):
    h_x = x_train.dot(w) + b
    y_pred = sigmoid_function(h_x)
    
    loss = binary_classification_loss_function(y_train, y_pred)

    derivative_cost_w = get_derivative_of_cost_w(y_train,y_pred,x_train)
    derivative_cost_b = get_derivative_of_cost_b(y_train, y_pred)

    if i % 100 == 0:
        print(f"{i}/100")
        print(f"loss : {loss}")

    w = w - learning_rate * derivative_cost_w
    b = b - learning_rate * derivative_cost_b

0/100
loss : 0.6931471805599453
100/100
loss : 0.2559114546067683
200/100
loss : 0.19111510262799947
300/100
loss : 0.16212875659270595
400/100
loss : 0.14511441603469005
500/100
loss : 0.1337171076481074
600/100
loss : 0.12544361483866232
700/100
loss : 0.11910291364689392
800/100
loss : 0.114050254806315
900/100
loss : 0.1099043275502473
1000/100
loss : 0.10642415114437508
1100/100
loss : 0.10344934868109625
1200/100
loss : 0.10086866950608615
1300/100
loss : 0.09860227477427252
1400/100
loss : 0.09659122188365524
1500/100
loss : 0.0947909393809886
1600/100
loss : 0.09316702361978195
1700/100
loss : 0.09169244287991525
1800/100
loss : 0.09034562532887931
1900/100
loss : 0.0891091192807536
2000/100
loss : 0.08796863412500426
2100/100
loss : 0.08691234055967648
2200/100
loss : 0.08593035124470584
2300/100
loss : 0.08501432940144235
2400/100
loss : 0.08415718971832972
2500/100
loss : 0.08335286689615465
2600/100
loss : 0.0825961344664665
2700/100
loss : 0.08188246146396641
2800/100
loss

In [40]:
h_x = x_test.dot(w) + b
y_pred = sigmoid_function(h_x)

In [46]:
accuracy_score(y_pred > 0.5, y_test)

0.9736842105263158