# Logistic Regression from scratch, Iris

Adapted from Pellarolo, Martin.  ["Logistic Regression from scratch in Python."](https://medium.com/@martinpella/logistic-regression-from-scratch-in-python-124c5636b8ac) Medium. Feb. 22, 2018.

### Data

In [1]:
from sklearn import datasets

iris = datasets.load_iris()

X = iris.data  #[:,:2]

# two non-linearly separable classes are labeled with same category for binary classification problem
y = (iris.target != 0) * 1

### Equations

$ h_\theta(x) = g(\theta^Tx) $

$ z = \theta^Tx$

Sigmoid

$g(z) = \frac{1}{1+e^{-z}}$

Loss Function

$h = g(X\theta)$

$J(\theta) = \frac{1}{m}*(-y^T log(h) - (1-y)^T log(1-h))$

Gradient Descent (partial derivative of loss function)

$\frac{\partial J(\theta)}{\partial \theta_j} = \frac{1}{m}X^T(g(X\theta)-y)$

In [2]:
import numpy as np
from typing import Union

class LogisticRegression:
    def __init__(self, lr=0.01, num_iter=1e5, fit_intercept=True, verbose=False):
        """Instantiate logistic regression model
        
        Arguments:
            lr (float):
            num_iter Union[float, int]: 
            fit_intercept (bool):
            verbose (bool):
        """
        self.lr = lr
        self.num_iter = int(num_iter)
        self.fit_intercept = fit_intercept
        self.verbose = verbose
        
    def _add_intercept(self, X):
        """Add intercept column of ones"""
        intercept = np.ones((X.shape[0],1))
        return np.concatenate((intercept, X), axis=1)
    
    def _sigmoid(self, z):
        """logistic function, aka sigmoid.  Gives outputs between 0 and 1 for all values of X.
        
        Arguments:
            z: matrix of inputs dot multiplied by weights
        
        """
        return 1 / (1 + np.exp(-z))
    
    def _gradient_descent(self, X, y):
        """Minimize the loss function to 'fit' weights through the use of gradient descent
        
        Arguments:
            X (np.array): independent variables
            y (np.array): target variable
            
        Returns (np.array): updated weights
        """
        z = np.dot(X, self.theta)  # X\theta
        h = self._sigmoid(z)  
        
        # derivative of the loss function with respect to each weight
        gradient = np.dot(X.T, (h-y)) / y.shape[0]  #y.size
        self.theta -= self.lr * gradient   
        return self.theta
    
    def _loss(self, h, y):
        """Measure how parameters/weights (theta) perform against actual targets
        
        Arguments:
            h (np.array): independent variables transformed by weights and sigmoid
            y (np.array): target variable
            
        Returns (np.array): loss of predicted vs actual values
        """
        return (-y * np.log(h) - (1-y) * np.log(1-h)).mean()
    
    def fit(self, X, y):
        """Adjust weights through the use of gradient descent to minimize loss function
        
        Arguments:
            X (np.array): independent variables
            y (np.array): target variable
        
        Returns (np.array): final weights
        """
        if self.fit_intercept:
            X = self._add_intercept(X)
            
        # weights initialization
        self.theta = np.zeros(X.shape[1])
        
        # gradient descent
        for i in range(self.num_iter):
            self.theta = self._gradient_descent(X,y)
            
            if (self.verbose == True and i% 1e4 == 0):
                z = np.dot(X,self.theta)
                h = self._sigmoid(z)
                loss = self._loss(h,y)
                print(f"loss: {loss}\ttheta: {self.theta}\t")
                
    def predict_prob(self, X):
        """Use trained weights to predict probability of an array of independent variables
        
        Arguments:
            X (np.array): independent variables
            
        Returns (np.array): predictions of target variable
        """
        if self.fit_intercept:
            X = self._add_intercept(X)
            
        return self._sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold=0.5):
        """Determine prediction based on probability vs a threshold
        
        Arguments:
            X (np.array): independent variables
            threshold (float): Threshold at which classification is positive vs negative
            
        Returns (np.array): predictions of target variable
        """
        return self.predict_prob(X)>=threshold

In [3]:
model = LogisticRegression(lr=0.1, num_iter=3e5, fit_intercept=True, verbose=True)
model.fit(X,y)

loss: 0.5518148780651356	theta: [0.01666667 0.1253     0.0386     0.13916667 0.05176667]	
loss: 0.0009260487284893174	theta: [-0.50737031 -0.80046713 -2.79745681  4.35579803  2.00574432]	
loss: 0.0004964360803264601	theta: [-0.56140932 -0.88897972 -3.11101408  4.85051629  2.25210803]	
loss: 0.00034433226750420003	theta: [-0.59392067 -0.94300082 -3.29764476  5.14829545  2.40300338]	
loss: 0.00026543657803717035	theta: [-0.61746509 -0.9824939  -3.43135472  5.36342423  2.51316817]	
loss: 0.000216826367785434	theta: [-0.63602952 -1.01385351 -3.53574369  5.53252175  2.60040554]	
loss: 0.0001837400049202278	theta: [-0.65140793 -1.03997669 -3.62143955  5.67214256  2.67284486]	
loss: 0.00015970148734077025	theta: [-0.66456568 -1.06243101 -3.69415725  5.79121832  2.73490567]	
loss: 0.0001414114307579239	theta: [-0.67608352 -1.0821639  -3.75732965  5.8951302   2.789267  ]	
loss: 0.00012700778779256526	theta: [-0.6863387  -1.09979333 -3.81318244  5.98737661  2.83767951]	
loss: 0.00011535810515011

In [4]:
preds = model.predict(X)
preds

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [5]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [6]:
(preds == y).mean()

1.0

In [7]:
model.theta

array([-0.79687435, -1.29314673, -4.38930778,  6.96197446,  3.35690585])

### Scikit-learn Implementation

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [9]:
model = LogisticRegression()
model.fit(X,y)
predicted_classes = model.predict(X)
accuracy = accuracy_score(y, predicted_classes)
print(f"Accuracy: {accuracy}")
parameters = model.coef_
print(f"Parameters: {parameters}")

Accuracy: 1.0
Parameters: [[ 0.44501376 -0.89999242  2.32353827  0.97345836]]
