In [87]:
import pandas as pd
import numpy as np
import sklearn
import sys

### LogisticRegression without vectorization

In [88]:
# To calculate the input value of sigmoid function
def cal_z(arr, w, b):
    return np.dot(arr, w) + b

# To calculate the input value for derivative of loss function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# To calculate derivetive of loss function
def cal_dz(arr, y, w, b):
    return sigmoid(cal_z(arr, w, b)) - y
    
# Main function of Logistic regression (Uisng row vectors)
def LogisticRegression(X, Y, learning_rate = 0.0001, loops=1000):

    dimension = X.shape[1] # Get the dimension of each data
    samplesize = X.shape[0] # Get the size of dataset
    data = X
    
    b = 0
    w = np.array([0 for i in range(dimension)])
    
    for loop in range(loops): # Loop dataset to convergence
        db = 0
        dw = np.array([0 for i in range(dimension)])
        
        for ind_d in range(samplesize): # Without vectorization, each instance in dataset should be take into consideration
            dz = cal_dz(data[ind_d], Y[ind_d], w, b) # Calculate dz for every instance in dataset
            for ind_w in range(dimension): # calculate dw for every variable
                dw[ind_w] += data[ind_d][ind_w] * dz
                db += dz
        # Take average of the whole derivatives
        db = db / samplesize
        dw = dw / samplesize
        
        # change the b and w
        b = b - learning_rate * db
        w = w - learning_rate * dw
    
    return b, w

In [89]:
X = np.array([[1, 2, 3, 4, 5], [2, 3, 4, 6, 7], [4, 6, 8, 9, 0]])
Y = np.array([0, 1, 1])

In [90]:
LogisticRegression(X, Y)

(0.010811150446888736,
 array([ 0.0193    ,  0.04346667,  0.05966667,  0.07043333,  0.00576667]))

In [91]:
lr = sklearn.linear_model.LogisticRegression()
lr.fit(X, Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [92]:
lr.coef_

array([[ 0.27450593,  0.13979879,  0.00509164,  0.20360991, -0.20971867]])

In [93]:
lr.intercept_

array([-0.16003637])