# Logistic Regression From Scratch
Alice Liu<br>
Jiahua Liang

In [None]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

## 1. Implementation of Logistic Regression

### 1.1 Math Functions

In [None]:
# define sigmoid function
def sigmoid(x): 
    """Calculates Sigmoid value"""
    
    sig = 1./(1+np.exp(-x))
    return sig

# define cost function:
def cost_fn(x, y, b):
    """Calculates cost"""
    
    z = np.dot(x,b)
    sig = sigmoid(z)
    J = - np.dot(y.T,np.log(sig+1e-6)) - np.dot((1-y).T,np.log(1e-6-sig)) # add a small constant to avoid numerical issue
    return J

# define function to compute gradient
def gradient(x, y, b):
    """Calculates gradient vector"""
    
    sig = sigmoid(np.dot(x, b))        
    grad = np.dot(x.T, sig - y) 
    return grad 

# define function to compute Hessian matrix
def hessian(x, y, b):
    """Calculates Hessian matrix"""
    
    N,D = x.shape
    R = np.zeros((N,N))                     # initialize diagonal matrix
    for i in range(N):
        sig = sigmoid(np.dot(x[i], b))
        R[i,i] = sig*(1-sig)          # fill diagonal entries
    H = x.T@R@x                             # Hessain matrix
    return H

### 1.2 Optimization Methods

Note: the `gradient_descent` function is a 3-in-1 universal function for batch (ordinary) gradient descent, stochastic gradient descent, and mini-batch gradient descent. User can choose which one to use by inputting the proper `batch_size` and `epochs`.

For batch (ordinary) gradient descent: 
- `batch_size` = the number of observations in the dataset 
- `epochs` is the same as the maximum number of iterations

For stochastic gradient descent: 
- `batch_size` = 1
- the maximum number of iterations = `epochs` $\times$ the number of observations

For mini-batch gradient descent: 
- `batch_size` = any valid value
- the maximum number of iterations = `epochs` $\times$ the number of batches (the number of observations / `batch_size`)

In [None]:
# define generalized gradient descent function
def gradient_descent(x, y, b, learning_rate, decay_rate, epsilon, epochs, batch_size, momentum):
    """Perform generalized gradient descent algorithm, including ordinary, stochastic, mini-batch versions

    Parameters
    ----------
    x : array-like
        The input matrix with rows as observations and columns as features
    y : array-like
        The labels for observations  
    b : array-like
        The initial model parameters
    learning_rate : float
        Determines the step size at each iteration, must be > 0, typically > 0 and < 1      
    decay_rate : float
        The decay rate for inversely decaying learning rate, must be >=0, typically >= 0 and < 1     
    epsilon : float
        The precision tolerance of cost function value as stopping criteria
    epochs : int
        The number of times that the entire dataset is traversed 
    batch_size : int
        The the size of a batch of the input data
    momentum : float
        The coefficient of the momentum term

    Returns
    -------
    J_new : float
        The final cost function value
    b : array-like
        The optimized model parameters
    iteration : int
        The total number of iterations run
    """
    
    x = np.array(x, dtype=np.dtype("float64"))
    y = np.array(y, dtype=np.dtype("float64")).flatten()
    J = np.inf    
    num_obs = len(x)
    num_batch = num_obs // batch_size 
    iteration = 0 
    change  =0
    
    # arguement value check
    if not 0 < batch_size <= num_obs:
        raise ValueError(
            "'batch_size' must be greater than zero and less than "
            "or equal to the number of observations")
        
    if num_obs != y.shape[0]:
        raise ValueError("'x' and 'y' lengths do not match")
        
    if x.shape[1] != len(b):
        raise ValueError("incorrect number of parameters")
    
    # algorithem starts
    for epoch in range(epochs):
        x, y = shuffle(x, y, random_state=0)                        # shuffle x and y for a new epoch
        for start in range(0, num_obs, batch_size):
            end = start + batch_size
            x_sub = x[start:end,:]
            y_sub = y[start:end]
            J_new = cost_fn(x, y, b)                                # Compute cost
            grad = gradient(x_sub, y_sub, b)                        # compute gradient
            new_change = learning_rate*grad + momentum*change       # calculate the update of parameters 
            b = b - new_change                                      # update parameters
            learning_rate = learning_rate/(1+decay_rate*iteration)  # inverse decay of learning rate
            change = new_change
            iteration += 1
            if np.abs(J_new - J) < epsilon:  # terminate if the difference in cost function is less than the precision
                break
            else:
                J = J_new
        if num_obs % batch_size != 0:    # if batch size is not divisible by the number of observations
            x_sub = x[num_obs-(num_obs%batch_size):num_obs,:]  # use the rest of the data to perform one more iteration
            y_sub = y[num_obs-(num_obs%batch_size):num_obs]
            J_new = cost_fn(x, y, b)                                # Compute cost
            grad = gradient(x_sub, y_sub, b)                        # compute gradient
            new_change = learning_rate*grad + momentum*change       # calculate the update of parameters 
            b = b - new_change                                      # update parameters
            learning_rate = learning_rate/(1+decay_rate*iteration)  # inverse decay of learning rate
            change = new_change
            iteration += 1
            if np.abs(J_new - J) < epsilon:  # terminate if the difference in cost function is less than the precision
                break
            else:
                J = J_new
            
    return J_new, b, iteration

# define function for Newton Method
def newton(x, y, b, epsilon, max_iters):
    """Perform generalized gradient descent algorithm, including ordinary, stochastic, mini-batch stochastic versions

    Parameters
    ----------
    x : array-like
        The input matrix with rows as observations and columns as features
    y : array-like
        The labels for observations
    b : array-like
        The initial model parameters
    epsilon : float
        The precision tolerance of cost function value as stopping criteria
    max_iters : int
        The maximum number of iterations to be run

    Returns
    -------
    J_new : float
        The final cost function value
    b : array-like
        The optimized model parameters
    iteration : int
        The total number of iterations run
    """
    
    x = np.array(x, dtype=np.dtype("float64"))
    y = np.array(y, dtype=np.dtype("float64")).flatten()
    J = 1e9
    iteration = 0
    for i in range(0, max_iters):
        J_new = cost_fn(x, y, b)             # Compute cost
        grad = gradient(x, y, b)             # compute gradient
        H = hessian(x, y, b)                 # compute Hessian matrix
        b = b - np.linalg.inv(H+np.identity(len(b))*1e-6)@grad # update parameters, add an indentity matrix with small constant to avoid numerical issue
        iteration += 1
        if np.abs(J_new - J) < epsilon:      # terminate if the difference in cost function is less than the precision
            break
        else:
            J = J_new
            
    return J_new, b, iteration

### 1.3 Logistic Regression Class

In [None]:
class Logistic_Regression:
    """
    A class used to represent a logistic regression model

    Attributes
    ----------
    order : int (either 1 or 2)
        Indicates the use of first-order (1) or second-order (2) optimization method, default value = 1
    learning_rate : float
        Determines the step size at each iteration, must be > 0, typically > 0 and < 1, default value = 1e-3      
    decay_rate : float
        The decay rate for inversely decaying learning rate, must be >=0, typically >= 0 and < 1, default value = 0     
    epsilon : float
        The precision tolerance of cost function value as stopping criteria, default value = 1e-4
    epochs : int
        The number of times that the entire dataset is traversed, default value = 32
    batch_size : int
        The the size of a batch of the input data, default value = 1
    momentum : float
        The coefficient of the momentum term, default value = 0
    threshold : float
        The probability threshold for class assignment, default value = 0.5
    verbose : boolean
        Indicates whether to show model output or not, default value = True
    b : array-like
        Initial model parameters
    param : array-like
        The optimized model parameters
    coef_ : array-like (2D)
        The optimized model coefficients
    intercept_ : array-like (2D)
        The optimized model intercept

    Methods
    -------
    fit(x, y)
        Trains the model with input data x and true label y
    predict(x)
        Predicts class labels for samples in x
    predict_proba(x)
        Predicts probability estimates for samples in x
    score(x, y)
        Calculates classification accuracy
    """
    
    def __init__(self, order=1, learning_rate=1e-3, decay_rate=0, epsilon=1e-4, epochs=32, batch_size=1, 
                 momentum=0.0, threshold=0.5, verbose=True):
        """Initialize an object of the class"""
        
        self.order = int(order)              # the order of optimization method (1 for first order, 2 for second order)
        self.learning_rate = learning_rate   # initial learning rate for gradient descent
        self.decay_rate = decay_rate         # decay rate of inversely decaying learning rate
        self.epsilon = epsilon               # the precision tolerance of cost function value as stopping criteria       
        self.epochs = int(epochs)            # the number of epochs to iterate
        self.batch_size = int(batch_size)    # the size of batch for general stochastic gradient descent
        self.momentum = momentum             # the momentum coefficient for general stochastic gradient descent
        self.threshold = threshold           # the probability threshold for class assignment
        self.verbose = verbose               # whether to show results or not
        
        # argument value check
        if self.order != 1 and self.order != 2:
            raise ValueError("only first or second order methods are supported, "
                             "please type in 1 for first order, 2 for second order")
        
        if self.learning_rate <= 0:
            raise ValueError("'learning_rate' must be greater than zero")
      
        if self.decay_rate < 0:
            raise ValueError("'decay_rate' must be greater or equal to zero")
        
        if self.epsilon <= 0:
            raise ValueError("'tolerance' must be greater than zero")
        
        if self.epochs <= 0:
            raise ValueError("'epochs' must be greater than zero")
        
        if self.batch_size <= 0:
            raise ValueError("'batch_size' must be greater than zero")
            
        if self.momentum < 0 or self.momentum > 1:
            raise ValueError("'momentum' must be between zero and one")
     
    # fit function for training the model
    def fit(self, x, y):
        """Trains the model with input data x and true label y

        Parameters
        ----------
        x : array-like
            The input matrix with rows as observations and columns as features
        y : array-like
            The true labels for observations
        
        Returns
        -------
        self : object
            The object of the class
        """
        
        if x.ndim == 1:                      # if input matrix has only one dimension
            x = x[:, None]
            
        N = x.shape[0]
        x = np.column_stack([np.ones(N), x]) # add a column of ones for intercept of the model
        N,D = x.shape
        self.b = np.zeros(D)                 # initialize parameters
        
        if self.order == 1: 
            # use gradient descent 
            cost, self.param, iters = gradient_descent(x, y, self.b, self.learning_rate, self.decay_rate, 
                                                       self.epsilon, self.epochs, self.batch_size, self.momentum)
        else:
            # use Newton method
            cost, self.param, iters = newton(x, y, self.b, self.epsilon, self.epochs)
        
        if self.verbose:
            print(f'terminated after {iters} iterations, with cost equal to {cost}')
            print(f'the coefficients found: {self.param}')
        
        self.coef_ = np.array([self.param[1:]])
        self.intercept_ = np.array([self.param[0]])
        
        return self
    
    # function for predicting class labels for samples in x
    def predict(self, x):
        """Predicts class labels for samples in x

        Parameters
        ----------
        x : array-like
            The input matrix with rows as observations and columns as features
        
        Returns
        -------
        y_pred : array
            The array of predicted class labels
        """
        
        if x.ndim == 1:
            x = x[:, None]
        Nt = x.shape[0]
        x = np.column_stack([np.ones(Nt), x])      # add a column of ones for intercept of the model
        yh = sigmoid(np.dot(x, self.param))            # predict output probability
        y_pred = [1 if x>self.threshold else 0 for x in yh]   # assign class labels with threshold
        
        return np.array(y_pred)
    
    # function for predicting probability estimates for samples in x
    def predict_proba(self, x):
        """Predicts probability estimates for samples in x

        Parameters
        ----------
        x : array-like
            The input matrix with rows as observations and columns as features
        
        Returns
        -------
        yh : array
            The array of predicted probability estimates
        """
        
        if x.ndim == 1:
            x = x[:, None]
        Nt = x.shape[0]
        x = np.column_stack([np.ones(Nt), x])      # add a column of ones for intercept of the model
        yh = sigmoid(np.dot(x, self.param))            # predict output probability
        
        return np.array(yh)
    
    # function for calculating classification accuracy
    def score(self, x, y):
        """Calculates classification accuracy

        Parameters
        ----------
        x : array-like
            The input matrix with rows as observations and columns as features
        y : array-like
            The true labels for observations
        
        Returns
        -------
        accuracy : float
            The classification accuracy
        """
        
        y_pred = self.predict(x)                   # predicted labels
        accuracy = 1 - np.mean(abs(y - y_pred))    # classification accuracy
        
        return accuracy
    

## 2. Use Case Application

### 2.1 Import Data

In [None]:
url = 'https://raw.githubusercontent.com/alicekejialiu/datasets/main/Kickstarter.csv'
df = pd.read_csv(url,index_col=0,encoding = 'unicode_escape')

In [None]:
df

Unnamed: 0_level_0,name,goal,pledged,state,disable_communication,country,currency,deadline,state_changed_at,created_at,launched_at,staff_pick,backers_count,static_usd_rate,usd_pledged,category,spotlight,name_len,name_len_clean,blurb_len,blurb_len_clean,deadline_weekday,state_changed_at_weekday,created_at_weekday,launched_at_weekday,deadline_month,deadline_day,deadline_yr,deadline_hr,state_changed_at_month,state_changed_at_day,state_changed_at_yr,state_changed_at_hr,created_at_month,created_at_day,created_at_yr,created_at_hr,launched_at_month,launched_at_day,launched_at_yr,launched_at_hr,create_to_launch_days,launch_to_deadline_days,launch_to_state_change_days
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
14042,Elysian Tuned Aperture Pickups for Electric Gu...,15000.0,4257.00,failed,False,US,USD,2015-04-05T12:42:40,2015-04-05T12:42:41,2015-03-01T10:36:46,2015-03-06T12:42:40,False,35,1.000000,4257.000000,Hardware,False,9.0,8.0,21.0,17.0,Sunday,Sunday,Sunday,Friday,4,5,2015,12,4,5,2015,12,3,1,2015,10,3,6,2015,12,5,30,
767,Linker,4000.0,51.00,failed,False,CA,CAD,2014-12-26T08:45:09,2014-12-26T08:45:25,2014-08-28T09:45:56,2014-10-27T08:45:09,False,2,0.890047,45.392404,Apps,False,1.0,1.0,3.0,3.0,Friday,Friday,Thursday,Monday,12,26,2014,8,12,26,2014,8,8,28,2014,9,10,27,2014,8,59,60,
2128,Making the Move--Edinburgh Fringe 2014,7750.0,7860.00,successful,False,US,USD,2014-08-19T11:00:00,2014-08-19T11:00:10,2014-07-30T02:32:04,2014-07-31T13:30:45,False,26,1.000000,7860.000000,Plays,True,5.0,4.0,25.0,17.0,Tuesday,Tuesday,Wednesday,Thursday,8,19,2014,11,8,19,2014,11,7,30,2014,2,7,31,2014,13,1,18,18.0
17449,Apple Usb Cable Protector,5000.0,1633.00,failed,False,FR,EUR,2015-10-18T16:00:20,2015-10-18T16:00:20,2015-08-16T12:24:07,2015-08-19T16:00:20,False,46,1.107810,1809.053469,Gadgets,False,4.0,4.0,21.0,15.0,Sunday,Sunday,Sunday,Wednesday,10,18,2015,16,10,18,2015,16,8,16,2015,12,8,19,2015,16,3,60,
10959,Simple Work Attendance Sheet (S.work.a.S.),1300.0,16.00,failed,False,IT,EUR,2015-11-06T08:49:35,2015-11-06T08:49:35,2015-09-15T11:36:44,2015-10-12T08:49:35,False,1,1.135892,18.174280,Software,False,5.0,5.0,23.0,12.0,Friday,Friday,Tuesday,Monday,11,6,2015,8,11,6,2015,8,9,15,2015,11,10,12,2015,8,26,25,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17006,The Adventure Field Journal,12000.0,100.00,failed,False,US,USD,2016-07-04T22:59:00,2016-07-04T22:59:02,2016-06-07T14:08:47,2016-06-08T17:38:07,False,2,1.000000,100.000000,Gadgets,False,4.0,4.0,14.0,10.0,Monday,Monday,Tuesday,Wednesday,7,4,2016,22,7,4,2016,22,6,7,2016,14,6,8,2016,17,1,26,
17425,rolling shoulder,40000.0,2.00,failed,False,IT,EUR,2016-01-20T09:21:52,2016-01-20T09:21:52,2015-12-18T07:39:43,2015-12-21T09:21:52,False,2,1.079775,2.159550,Gadgets,False,2.0,2.0,13.0,10.0,Wednesday,Wednesday,Friday,Monday,1,20,2016,9,1,20,2016,9,12,18,2015,7,12,21,2015,9,3,30,
9324,Hypothes.is - Taking peer review to the Internet.,100000.0,105786.99,successful,False,US,USD,2011-11-13T23:00:00,2011-11-13T23:00:30,2011-09-17T15:59:13,2011-09-30T17:57:43,True,791,1.000000,105786.990000,Software,True,8.0,6.0,10.0,8.0,Sunday,Sunday,Saturday,Friday,11,13,2011,23,11,13,2011,23,9,17,2011,15,9,30,2011,17,13,44,44.0
18410,Do it Yourself Garage,70000.0,0.00,failed,False,US,USD,2015-05-28T07:07:16,2015-05-28T07:07:20,2015-04-25T04:47:12,2015-04-28T07:07:16,False,0,1.000000,0.000000,,False,4.0,3.0,15.0,7.0,Thursday,Thursday,Saturday,Tuesday,5,28,2015,7,5,28,2015,7,4,25,2015,4,4,28,2015,7,3,30,


### 2.2 Set predictors and outcome variable

In [None]:
df.groupby('state')['state'].count()

state
canceled       2214
failed        10299
live            462
successful     5386
suspended       207
Name: state, dtype: int64

In [None]:
df=df[~df['state'].isin(['canceled','live','suspended'])]

In [None]:
# df['state'] = np.where(df['state'] == 'successful', 1, df['state'])
# df['state'] = np.where(df['state'] == 'failed', 0, df['state'])
df['state'] = np.where(df['state'] == 'successful', 1, 0)

In [None]:
df['state'].value_counts()

0    10299
1     5386
Name: state, dtype: int64

In [None]:
df=df.rename(columns = {'state':'success'})

In [None]:
df = df[['success','goal','name_len_clean','create_to_launch_days','launch_to_deadline_days']]

In [None]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15685 entries, 14042 to 146
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   success                  15685 non-null  int64  
 1   goal                     15685 non-null  float64
 2   name_len_clean           15685 non-null  float64
 3   create_to_launch_days    15685 non-null  int64  
 4   launch_to_deadline_days  15685 non-null  int64  
dtypes: float64(2), int64(3)
memory usage: 735.2 KB


In [None]:
df

Unnamed: 0_level_0,success,goal,name_len_clean,create_to_launch_days,launch_to_deadline_days
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14042,0,15000.0,8.0,5,30
767,0,4000.0,1.0,59,60
2128,1,7750.0,4.0,1,18
17449,0,5000.0,4.0,3,60
10959,0,1300.0,5.0,26,25
...,...,...,...,...,...
17006,0,12000.0,4.0,1,26
17425,0,40000.0,2.0,3,30
9324,1,100000.0,6.0,13,44
18410,0,70000.0,3.0,3,30


### 2.3 Data Pre-processing

In [None]:
X = df.iloc[:,1:]
y = df['success']

In [None]:
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_std, np.array(y), test_size=0.33, random_state=42)

### 2.4 Applying Logistic Regression with sklearn 

In [None]:
lr = LogisticRegression()
model = lr.fit(X_train, y_train)
scikit_learn = []
print('The results for logistic regression with sklearn: ')
print('Intercept b0 =', round(model.intercept_[0],4))
scikit_learn.append(round(model.intercept_[0],4))
for i in range(len(model.coef_[0])):
    print('b'+str(i+1)+' =', round(model.coef_[0][i],4))
    scikit_learn.append(round(model.coef_[0][i],4))

The results for logistic regression with sklearn: 
Intercept b0 = -0.9696
b1 = -7.2199
b2 = 0.4086
b3 = 0.0296
b4 = -0.2146


In [None]:
y_pred = model.predict(X_test)
print('The accuracy score of sklearn logistic regression model: ', 
      round(accuracy_score(y_test,y_pred),3))
scikit_learn.append(round(accuracy_score(y_test,y_pred),3))

The accuracy score of sklearn logistic regression model:  0.663


In [None]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix:')
print(confusion_matrix(y_test,y_pred))
print('F1 score:')
print(f1_score(y_test,y_pred))
scikit_learn.append(f1_score(y_test,y_pred))

Confusion matrix:
[[3167  224]
 [1519  267]]
F1 score:
0.23451910408432147


### 2.5 Applying Home-Made Logistic Regression

#### 2.5.1 Batch (Ordinary) Gradient Descent

In [None]:
logit1 = Logistic_Regression(order=1, learning_rate=1e-4, decay_rate=0, epsilon=1e-6, epochs= 5000, 
                             batch_size=len(X_train), momentum=0, threshold=0.5, verbose=True)
model1 = logit1.fit(X_train, y_train)
ordinary_gradient_descent = []
print('The results for self-implemented logistic regression with ordinary gradient descent algorithm: ')
print('Intercept b0 =', round(model1.intercept_[0],4))
ordinary_gradient_descent.append(round(model1.intercept_[0],4))
for i in range(len(model1.coef_[0])):
    print('b'+str(i+1)+' =', round(model1.coef_[0][i],4))
    ordinary_gradient_descent.append(round(model1.coef_[0][i],4))

terminated after 5000 iterations, with cost equal to 6331.270373556473
the coefficients found: [-0.98733465 -7.61820293  0.40947663  0.0304532  -0.21325399]
The results for self-implemented logistic regression with ordinary gradient descent algorithm: 
Intercept b0 = -0.9873
b1 = -7.6182
b2 = 0.4095
b3 = 0.0305
b4 = -0.2133


In [None]:
y_pred1 = model1.predict(X_test)
print('The accuracy score of self-implemented logistic regression with ordinary gradient descent algorithm: ', 
      round(accuracy_score(y_test,y_pred1),3))
ordinary_gradient_descent.append(round(accuracy_score(y_test,y_pred1),3))

The accuracy score of self-implemented logistic regression with ordinary gradient descent algorithm:  0.664


In [None]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix:')
print(confusion_matrix(y_test,y_pred1))
print('F1 score:')
print(f1_score(y_test,y_pred1))
ordinary_gradient_descent.append(f1_score(y_test,y_pred1))

Confusion matrix:
[[3164  227]
 [1513  273]]
F1 score:
0.23884514435695536


#### 2.5.2 Stochastic Gradient Descent

In [None]:
logit2 = Logistic_Regression(order=1, learning_rate=1e-3, decay_rate=0, epsilon=1e-6, epochs = 32, 
                             batch_size=1, momentum=0.2, threshold=0.5, verbose=True)
model2 = logit2.fit(X_train, y_train)
stochastic_gradient_descent = []
print('The results for self-implemented logistic regression with stochastic gradient descent algorithm: ')
print('Intercept b0 =', round(model2.intercept_[0],4))
stochastic_gradient_descent.append(round(model2.intercept_[0],4))
for i in range(len(model2.coef_[0])):
    print('b'+str(i+1)+' =', round(model2.coef_[0][i],4))
    stochastic_gradient_descent.append(round(model2.coef_[0][i],4))

terminated after 298350 iterations, with cost equal to 6427.971219266905
the coefficients found: [-0.75008861 -1.55325984  0.40274267  0.00405048 -0.23940403]
The results for self-implemented logistic regression with stochastic gradient descent algorithm: 
Intercept b0 = -0.7501
b1 = -1.5533
b2 = 0.4027
b3 = 0.0041
b4 = -0.2394


In [None]:
y_pred2 = model2.predict(X_test)
print('The accuracy score of self-implemented logistic regression with stochastic gradient descent algorithm: ', 
      round(accuracy_score(y_test, y_pred2), 3))
stochastic_gradient_descent.append(round(accuracy_score(y_test, y_pred2), 3))

The accuracy score of self-implemented logistic regression with stochastic gradient descent algorithm:  0.658


In [None]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix:')
print(confusion_matrix(y_test, y_pred2))
print('F1 score:')
print(f1_score(y_test,y_pred2))
stochastic_gradient_descent.append(f1_score(y_test, y_pred2))

Confusion matrix:
[[3220  171]
 [1598  188]]
F1 score:
0.17529137529137528


#### 2.5.3 Mini-Batch Gradient Descent

In [None]:
logit3 = Logistic_Regression(order=1, learning_rate=1e-4, decay_rate=0, epsilon=1e-6, epochs=5000, 
                             batch_size=64, momentum=0, threshold=0.5, verbose=True)
model3 = logit3.fit(X_train, y_train)
mini_batch_gradient_descent = []
print('The results for self-implemented logistic regression with mini-batch gradient descent algorithm: ')
print('Intercept b0 =', round(model3.intercept_[0],4))
mini_batch_gradient_descent.append(round(model3.intercept_[0],4))
for i in range(len(model3.coef_[0])):
    print('b'+str(i+1)+' =', round(model3.coef_[0][i],4))
    mini_batch_gradient_descent.append(round(model3.coef_[0][i],4))

terminated after 823901 iterations, with cost equal to 6336.150875071815
the coefficients found: [-9.67267847e-01 -7.58640181e+00  4.28038638e-01  3.79958715e-03
 -1.55101259e-01]
The results for self-implemented logistic regression with mini-batch gradient descent algorithm: 
Intercept b0 = -0.9673
b1 = -7.5864
b2 = 0.428
b3 = 0.0038
b4 = -0.1551


In [None]:
y_pred3 = model3.predict(X_test)
print('The accuracy score of self-implemented logistic regression with mini-batch gradient descent algorithm: ', 
      round(accuracy_score(y_test,y_pred3),3))
mini_batch_gradient_descent.append(round(accuracy_score(y_test,y_pred3),3))

The accuracy score of self-implemented logistic regression with mini-batch gradient descent algorithm:  0.662


In [None]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix:')
print(confusion_matrix(y_test,y_pred3))
print('F1 score:')
print(f1_score(y_test,y_pred3))
mini_batch_gradient_descent.append(f1_score(y_test,y_pred3))

Confusion matrix:
[[3148  243]
 [1505  281]]
F1 score:
0.24329004329004328


#### 2.5.4 Newton Method

In [None]:
logit4 = Logistic_Regression(order=2, epsilon=1e-6, epochs= 5000, threshold=0.5, verbose=True)
model4 = logit4.fit(X_train, y_train)
newton_method = []
print('The results for self-implemented logistic regression with Newton method: ')
print('Intercept b0 =', round(model4.intercept_[0],4))
newton_method.append(round(model4.intercept_[0],4))
for i in range(len(model4.coef_[0])):
    print('b'+str(i+1)+' =', round(model4.coef_[0][i],4))
    newton_method.append(round(model4.coef_[0][i],4))

terminated after 11 iterations, with cost equal to 6320.521857482465
the coefficients found: [ -1.14728632 -11.11084946   0.41624442   0.03780027  -0.20133573]
The results for self-implemented logistic regression with Newton method: 
Intercept b0 = -1.1473
b1 = -11.1108
b2 = 0.4162
b3 = 0.0378
b4 = -0.2013


In [None]:
y_pred4 = model4.predict(X_test)
print('The accuracy score of self-implemented logistic regression with Newton method: ', 
      round(accuracy_score(y_test,y_pred4),3))
newton_method.append(round(accuracy_score(y_test,y_pred4),3))

The accuracy score of self-implemented logistic regression with Newton method:  0.664


In [None]:
from sklearn.metrics import confusion_matrix
print('Confusion matrix:')
print(confusion_matrix(y_test,y_pred4))
print('F1 score:')
print(f1_score(y_test,y_pred4))
newton_method.append(f1_score(y_test,y_pred4))

Confusion matrix:
[[3138  253]
 [1487  299]]
F1 score:
0.25577416595380664


### 2.6 Overall Test Result

In [None]:
df_result = pd.DataFrame([scikit_learn,ordinary_gradient_descent,stochastic_gradient_descent,mini_batch_gradient_descent,newton_method],
             index=['Scikit Learn','Ordinary Gradient Descent','Stochastic Gradient Descent','Mini-batch Gradient Descent',"Newton's Method"],
             columns = ['b0','b1','b2','b3','b4','Accuracy Score','F1 Score'])
df_result

Unnamed: 0,b0,b1,b2,b3,b4,Accuracy Score,F1 Score
Scikit Learn,-0.9696,-7.2199,0.4086,0.0296,-0.2146,0.663,0.234519
Ordinary Gradient Descent,-0.9873,-7.6182,0.4095,0.0305,-0.2133,0.664,0.238845
Stochastic Gradient Descent,-0.7501,-1.5533,0.4027,0.0041,-0.2394,0.658,0.175291
Mini-batch Gradient Descent,-0.9673,-7.5864,0.428,0.0038,-0.1551,0.662,0.24329
Newton's Method,-1.1473,-11.1108,0.4162,0.0378,-0.2013,0.664,0.255774
