In [2]:
import pandas as pd
import numpy as np
import sklearn
import random
import sys

random.seed(18)

## LogisticRegression without vectorization

In [3]:
# To calculate the input value of sigmoid function
def cal_z(arr, w, b):
    return np.dot(arr, w) + b

# To calculate the input value for derivative of loss function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# To calculate derivetive of loss function
def cal_dz(a, y):
    return a - y
    
# Main function of Logistic regression (Uisng row vectors)
def LogisticRegression(data, Y, learning_rate = 0.0001, loops=100):

    dimension = data.shape[1] # Get the dimension of each data
    samplesize = data.shape[0] # Get the size of dataset
    
    b = 0
    w = np.zeros((dimension, 1))
    
    for loop in range(loops): # Loop dataset to convergence
        db = 0
        dw = np.zeros((dimension, 1))
        
        for ind_d in range(samplesize): # Without vectorization, each instance in dataset should be take into consideration
            z = cal_z(data[ind_d], w, b)
            a = sigmoid(z)
            dz = cal_dz(a, Y[ind_d]) # Calculate dz for every instance in dataset
            for ind_w in range(dimension): # calculate dw for each variable
                dw[ind_w] += data[ind_d][ind_w] * dz
                db += dz
                
        # Take average of the whole derivatives
        db = db / samplesize
        dw = dw / samplesize
    
        # Change the b and w
        b = b - learning_rate * db
        w = w - learning_rate * dw
        
    return b, w

In [4]:
X = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 6, 7], [4, 6, 8, 9, 0]])
Y = np.array([0, 1, 1])
# lr = sklearn.linear_model.LogisticRegression(penalty="l2")
# lr.fit(X, Y)
# lr.coef_
# lr.intercept_

## LogisticRegression applying vectorization

In [5]:
def cal_z_vec(X, W, b):
    return np.dot(X, W) + b

def sigmoid_vec(Z):
    return 1 / (1 + np.exp(-Z))

def cal_dz_vec(A, Y):
    return A - Y.reshape(-1, 1)

def LogisticRegression_vec(X, Y, learning_rate = 0.0001, loops=100): # Using row vectors
    dimension = X.shape[1]
    samplesize = X.shape[0]
    
    W = np.zeros((dimension, 1))
    b = 0
    
    for i in range(loops):
        Z = cal_z_vec(X, W, b) 
        A = sigmoid_vec(Z)
        dz = cal_dz_vec(A, Y)
        dw = np.dot(X.T, dz) / samplesize
        db = np.sum(dz) / samplesize
        
        b = b - learning_rate * db
        W = W - learning_rate * dw
        
    return b, W

In [6]:
# LogisticRegression_vec(X, Y)