In [22]:
import warnings
warnings.filterwarnings('ignore') # to ignore warnings about pandas library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score

In [23]:
class LogisticRegressionClassifier:
    def __init__(self, lr=0.01, n_iters=100):
        self.lr = lr
        self.n_iters=n_iters
    

    def fit(self, x, y):
        if len(x.shape) == 1:
            self.n_params = 1
        else:
            self.n_params = x.shape[1]
        
        sample_size = x.shape[0]

        self.params = np.random.uniform(low=0.5, high=1.5, size=self.n_params)
        self.bias = 0.0

        epsilon = 1e-5
        for _ in range(self.n_iters):

            for i in range(sample_size):
                z = np.dot(x[i], self.params.T) + self.bias
                y_pred = 1 / (1 + np.exp(-z))

                for j in range(self.n_params):
                    self.params[j] -= self.lr * ( ((1-y[i])/(1-y_pred+epsilon)) - (y[i]/y_pred+epsilon) )  *  ( np.exp(-z) / np.square(1 + np.exp(-z)) ) * x[i][j]

                self.bias -= self.lr * ( ((1-y[i])/(1-y_pred+epsilon)) - (y[i]/y_pred+epsilon) )  *  ( np.exp(-z) / np.square(1 + np.exp(-z)) )
        

    def predict(self, x):
        z = np.dot(x, self.params.T) + self.bias
        return 1 / (1 + np.exp(-z))
    

    def score(self, x, y):
        y_pred = self.predict(x)
        y_pred = [1 if i > 0.5 else 0 for i in y_pred]
        return accuracy_score(y_pred=y_pred, y_true=y)



In [24]:
from sklearn.datasets import make_classification

In [25]:
x, y = make_classification(random_state=42, n_samples=50)
x, y

(array([[-0.92693047, -0.03666101,  0.01742877,  0.44381943, -1.24778318,
         -1.02438764,  0.01023306,  0.08190923, -0.05952536,  1.16316375,
          1.44127329,  0.22371669, -3.24126734,  1.6324113 , -0.25256815,
          0.13074058,  0.77463405, -0.44004449, -1.43014138, -1.43586215],
        [-0.45006547,  0.13044668, -0.062511  ,  0.0052437 ,  0.51443883,
         -0.14237949,  0.11567463, -0.31174083,  0.62284993,  1.55115198,
          0.33231401, -0.88863141, -1.06762043,  0.71161488,  0.12029563,
          1.27767682,  0.04698059, -1.53411417, -1.12464209, -0.74848654],
        [ 0.50091719,  0.7290478 , -0.32665202, -0.57677133,  0.54336019,
          0.75138712, -0.90431663, -0.8133793 , -0.97755524,  0.2597225 ,
         -1.62754244, -0.727392  ,  0.09933231, -0.66262376, -1.66940528,
         -1.8048821 ,  0.75539123, -0.76325916,  0.57059867,  0.04808495],
        [-0.03471177, -1.23116192,  0.53980966, -0.19236096, -0.90938745,
          0.75193303, -0.50347565, 

In [33]:
x.shape, y.shape

((50, 20), (50,))

In [26]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

In [27]:
clf = LogisticRegressionClassifier()
clf.fit(x_train, y_train)

In [28]:
clf.score(x_test, y_test)

0.9333333333333333

In [29]:
y_pred = clf.predict(x_test)
y_pred

array([9.43382182e-01, 2.91036661e-01, 3.90989794e-03, 1.02629357e-02,
       9.95846110e-01, 8.81864497e-03, 8.89760201e-01, 1.10555102e-06,
       9.97706832e-01, 9.91682410e-01, 5.38348080e-01, 9.99987753e-01,
       7.23468727e-05, 9.91579454e-01, 4.77507768e-02])

In [30]:
y_pred = [1 if i > 0.5 else 0 for i in y_pred]
y_pred, y_test

([1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0],
 array([1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0]))

In [31]:
clf.score(x_train, y_train)

1.0

In [32]:
clf.score(x_test, y_test)

0.9333333333333333