In [15]:
import pandas as pd
import numpy as np
#importing necessary libraries
from sklearn.datasets import make_classification

In [18]:
x, y=make_classification(n_samples=1000, n_features=5, n_informative=5, n_redundant=0, n_classes=2)
#make a faux classification problem with 1000 rows, 5 informative features, to classify into 2 classes

In [21]:
from sklearn.model_selection import train_test_split

In [23]:
x_train, x_test, y_train, y_test=train_test_split(x,y, random_state=5)

In [24]:
#now to start building the logistic regression model

In [82]:
class log_regr():
    
    def __init__(self, lr=0.1, n_iterations=100000, fit_intercept=True, verbose=False):
        
        self.lr=lr #learning rate (step size to take in gradient descent)
        self.n_iterations=n_iterations #max learning steps in gradient descent
        self.fit_intercept=True #whether or not to include a column of 1's to use as y-intercept in our hypothesis func
        self.verbose=verbose
        
    def __make_intercept(self, x):
        
        intercept=np.ones((x.shape[0],1)) #make column of ones equal to length of x
        return np.concatenate((intercept, x), axis=1) #join the intercept column 
    
    def __sigmoid(self, z):
        
        '''
        
        helper function to return the sigmoid function
        
        '''
        
        return 1/(1+np.exp(-z))
    
    
    def fit(self, x, y):
        
        '''
        find optimal weights for the given predictors and targets 
        
        '''
        #make intercept if necessary
        if self.fit_intercept:
            x=self.__make_intercept(x) 
        
        
        #initialize the gradient and weights with 1's
        gradient=np.ones(len(x.T))
        weights=np.ones(len(x.T))
        
        count=0
        
        '''
        the loss function for logistic regression is calculated from Maximum Likelihood estimation and is as follows:
        
        h=hypothesis function
        h=theta_0 + theta_1(x1) + theta_2(x2) + ...
        
        -1/m*(sum(y*log(h(x))+((1-y)*log(1-h(x)))))
        
        '''
        
        for i in range(self.n_iterations):
            
            
            z=np.dot(x, weights)
            
            predictions=self.__sigmoid(z) #squeezes the predictions to be between 0 and 1
            
            loss=sum((y-np.log(predictions)) + ((1-y)*np.log(1-predictions)))
            
            '''Basic steps for Gradient Descent
            
                1) Take the partial derivative with respect to each feature of the loss equation
                2) Calculate the gradient for each sample and divide by total number of samples
                3) Update each feature as follows: weight=weight-learning_rate*avg_gradient
                4) Repeat until gradient is within acceptable (0.001) threshold'''
            
            '''partial_derivative(x1) = (prediction(x1)-y1)*(x1)
               avg_gradient(x1)=1/m*sum(predictions(x1)-y1)*x1)'''
            
            errors=predictions-y
            gradient=np.dot(x.T, errors)/len(x)
            
            weights-=gradient*self.lr
            count+=1
            
            if self.verbose==True and count%20==0:
                print(f'MSE: {round(Loss,0)} | iteration: {count} | avg_gradient {round(np.mean(gradient),3)}')
                # every 20 iterations report on the learning 

        self.weights=weights
        self.count=count

        return f'Fit complete: {count} iterations'
        
    def predict_proba(self, x):
        
        if self.fit_intercept:
            x=self.__make_intercept(x)
            
        return self.__sigmoid(np.dot(x, self.weights))
    
    def predict(self, x, threshold=0.5):
        
        return self.predict_proba(x) >= threshold #return 0 if below threshold, 1 if above
    
    def score(self, x, y):
        
        predictions=self.predict(x, threshold=0.5)
        return (predictions==y).mean() #what ratio of classification labels were correct
    
    def coef(self):
        return self.weights
        
        
        

In [83]:
log=log_regr()

In [84]:
log.fit(x_train, y_train)

'Fit complete: 100000 iterations'

In [85]:
log.score(x_test,y_test)

0.764

In [89]:
log.predict_proba(x_test)

array([0.06128942, 0.81072998, 0.86686304, 0.84581439, 0.55354991,
       0.4466992 , 0.01524554, 0.02224619, 0.42960909, 0.30841236,
       0.71351839, 0.88889877, 0.8166099 , 0.71515382, 0.21636823,
       0.61010688, 0.9761873 , 0.15449802, 0.39787268, 0.41401609,
       0.97356467, 0.97051623, 0.32664249, 0.13393816, 0.60496815,
       0.26594431, 0.99048351, 0.9009892 , 0.11116134, 0.21201318,
       0.1104605 , 0.8025685 , 0.11537804, 0.55640202, 0.29968555,
       0.96543288, 0.0087408 , 0.60381635, 0.9033691 , 0.74134486,
       0.05915062, 0.05939162, 0.94223685, 0.84211325, 0.82276411,
       0.07642754, 0.70345924, 0.35126759, 0.89940217, 0.10311292,
       0.02447092, 0.01609298, 0.75067306, 0.81027762, 0.09994359,
       0.0114412 , 0.56957   , 0.12443021, 0.64487422, 0.7769182 ,
       0.05659   , 0.02659902, 0.3993717 , 0.86749681, 0.01023101,
       0.12489605, 0.2088482 , 0.85107546, 0.17503032, 0.54283339,
       0.03099943, 0.04927709, 0.16051284, 0.71476147, 0.82634

In [90]:
log.predict(x)

array([ True,  True,  True,  True, False, False, False, False, False,
       False,  True,  True, False,  True, False,  True,  True, False,
        True, False,  True,  True, False, False, False, False,  True,
        True,  True, False, False, False, False, False,  True, False,
       False,  True, False,  True,  True, False, False, False,  True,
        True, False,  True, False,  True,  True,  True,  True, False,
        True, False, False,  True,  True,  True,  True,  True,  True,
        True, False, False,  True,  True, False, False,  True, False,
       False, False,  True,  True,  True, False, False,  True,  True,
        True,  True, False,  True,  True,  True,  True, False,  True,
       False,  True,  True,  True,  True, False,  True,  True,  True,
        True, False, False,  True,  True, False,  True,  True,  True,
       False,  True, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False, False,  True, False,  True,
        True,  True,

In [92]:
log.score(x_test,y_test)

0.764

In [94]:
from sklearn.linear_model import LogisticRegression

In [95]:
sk_log=LogisticRegression()

In [96]:
sk_log.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [97]:
sk_log.score(x_test,y_test) #we performed similarly to sklearn

0.76

In [99]:
sk_log.coef_

array([[ 0.06550428,  0.02378677, -0.1592897 , -0.25692392, -1.21171222]])

In [101]:
sk_log.coef_-log.coef()[1:] #sklearn doesn't add a fit_intercept

array([[-0.00095993, -0.00016801, -0.00080718,  0.00363154,  0.01196109]])

Overall, we were able to implement the Logistic Regression classification model from scratch. We achieved similar performance to sklearn's implementation. Yay!