In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import math
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
#loading data
data = pd.read_csv('LOR.csv')
x = data.iloc[:, 2:4].values
y = data.iloc[:, 4].values

In [3]:
#splitting data into train & test
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [4]:
#applying scaling to the data to make calculations easier
scaler = StandardScaler()
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)

In [5]:
def sigmoid(val):
    return math.exp(val)/(math.exp(val) + 1)

def sigmoid_dr(val):
    return sigmoid(val)*(1 - sigmoid(val))

def cal_loss(y_pred, y):
    loss = sum((y - y_pred) ** 2) / n
    return loss

In [6]:
#logisticRegression algorithm code

#initializing slopes, learning rate & intercept
slope1 = 1
slope2 = 1
intercept = 1
learning_rate = 0.1

n = len(train_x)

old_loss = 0

while True:
    b_sig = slope1*train_x[:, 0] + slope2*train_x[:, 1] + intercept
    
    y_pred = np.array([sigmoid(val) for val in b_sig])
    
    #calculating derivatives of sigmoid values
    dr_sigmoid_val = np.array([sigmoid_dr(val) for val in b_sig])
    
    #calculate loss
    loss = cal_loss(y_pred, train_y)
    
    #if loss does not changing or changing very minute then break the loop
    if old_loss == loss:
        break
    if abs(old_loss - loss) < 0.0000001:
        break
    
    old_loss = loss
    
    com = y_pred - train_y
    #calculating derivative of loss w.r.t loss & intercept
    d_slope1 = (2/n) * sum(com * dr_sigmoid_val * train_x[:, 0])
    d_slope2 = (2/n) * sum(com * dr_sigmoid_val * train_x[:, 1])
    d_intercept = (2/n) * sum(com * dr_sigmoid_val)
    
    #updating slopes & intercept
    slope1 = slope1 - learning_rate * d_slope1
    slope2 = slope2 - learning_rate * d_slope2
    intercept = intercept - learning_rate * d_intercept

In [7]:
#model predicted output
y_pred_own = [0 if val < 0.5 else 1 for val in y_pred]

#checking our accuracy of prediction with new data
mat = confusion_matrix(train_y, y_pred_own)
score = accuracy_score(train_y, y_pred_own)

print(mat, score)
print('\n',slope1, slope2, intercept)

[[175  24]
 [ 28  93]] 0.8375

 2.6202500496516046 1.4086280657190846 -0.7838181010042542


In [8]:
#verifying the got output from model to sklearn LogisticRegression model
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(train_x, train_y)

print(log_reg.score(train_x, train_y))

print('\n', log_reg.coef_, log_reg.intercept_)

0.821875

 [[2.00316198 1.08088613]] [-0.87076381]


In [9]:
#got the same accuracy as sklearn model got