In [165]:
import pandas as pd
import numpy as np
import sklearn
import random
import sys

random.seed(18)

## LogisticRegression without vectorization

In [219]:
# To calculate the input value of sigmoid function
def cal_z(arr, w, b):
    return np.dot(arr, w.T) + b

# To calculate the input value for derivative of loss function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# To calculate derivetive of loss function
def cal_dz(a, y):
    return a - y
    
# Main function of Logistic regression (Uisng row vectors)
def LogisticRegression(data, Y, learning_rate = 0.0001, loops=100):

    dimension = data.shape[1] # Get the dimension of each data
    samplesize = data.shape[0] # Get the size of dataset
    
    b = 0
    w = np.zeros((1, dimension))
    
    for loop in range(loops): # Loop dataset to convergence
        db = 0
        dw = np.zeros((1, dimension))
        
        for ind_d in range(samplesize): # Without vectorization, each instance in dataset should be take into consideration
            z = cal_z(data[ind_d], w, b)
            a = sigmoid(z)
            dz = cal_dz(a, Y[ind_d]) # Calculate dz for every instance in dataset
            for ind_w in range(dimension): # calculate dw for each variable
                dw[0][ind_w] += data[ind_d][ind_w] * dz
                db += dz
                
        # Take average of the whole derivatives
        db = db / samplesize
        dw = dw / samplesize
    
        # Change the b and w
        b = b - learning_rate * db
        w = w - learning_rate * dw
        
    return b, w

In [220]:
X = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 6, 7], [4, 6, 8, 9, 0]])
Y = np.array([0, 1, 1])

In [221]:
LogisticRegression(X, Y)

(array([ 0.00678334]),
 array([[ 0.00747909,  0.01035258,  0.01322608,  0.01615468,  0.00234304]]))

In [222]:
lr = sklearn.linear_model.LogisticRegression(penalty="l2")
lr.fit(X, Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [223]:
lr.coef_

array([[ 0.27450593,  0.13979879,  0.00509164,  0.20360991, -0.20971867]])

In [224]:
lr.intercept_

array([-0.16003637])

## LogisticRegression applying vectorization

In [226]:
def cal_z_vec(X, W, b):
    return np.dot(W, X.T) + b

def sigmoid_vec(Z):
    return 1 / (1 + np.exp(-Z))

def cal_dz_vec(A, Y):
    return A - Y

def LogisticRegression_vec(X, Y, learning_rate = 0.0001, loops=100): # Using row vectors
    dimension = X.shape[1]
    samplesize = X.shape[0]
    
    W = np.zeros((1, dimension))
    b = 0
    
    for i in range(loops):
        Z = cal_z_vec(X, W, b)
        A = sigmoid_vec(Z)
        dz = cal_dz_vec(A, Y)

        dw = np.dot(dz, X) / samplesize
        db = np.sum(dz) / samplesize
        
        b = b - learning_rate * db
        W = W - learning_rate * dw
        
    return b, W

In [227]:
LogisticRegression_vec(X, Y)

(0.0013632543438255387,
 array([[ 0.00749425,  0.01037646,  0.01325868,  0.01619606,  0.00236973]]))