# Logistic Regression from scratch
### Using the iris dataset

### The sigmoid function:  $\sigma(z)$
$$\sigma(z)=\frac{1}{1+e^{-z}}$$

$$\sigma'(z)= \sigma(z)*(1- \sigma(z))$$

The logisitic regression model used here is:
$$y=\mathbf{\sigma(X^\top\theta})$$

In [None]:
import numpy as np
import sklearn
from sklearn import datasets

iris=datasets.load_iris()
X=iris.data[:,:2]
y=(iris.target !=0)*1 # multiplying True and False by 1 will give you integers!
lr = 0.01 ## learning rate

In [None]:
#incase you want to try each function one at a time to see results
def add_intercept(X):
        intercept = np.ones(X.shape[0]).reshape(X.shape[0], 1) ## bias initial just a bunch of ones
        return np.concatenate((intercept, X), axis=1)
    
def sigmoid(z):
    return 1/(1+np.exp(-z))

def loss(h, y):
        return (- (y * np.log(h) + (1-y) * np.log(1-h)))

def fit(X, y):
    X=self.__add_intercept(X)
    self.theta = np.zeros(X.shape[1])

    for i in range(self.iterations):
        z=np.dot(X, self.theta).clip(-1e4, 1e4)
        h=self.__sigmoid(z)

        gradient = np.dot(X.T, (h-y)) / y.shape[0]
        self.theta = self.theta - self.lr + gradient

def predict_probs(theta):
    h=sigmoid(np.dot(X, theta))
    return h

def predict(X, theta, threshold = 0.5):
    h = predict_probs(X, theta)
    return h >= threshold


In [3]:
class LogisticRegression(object):
    def __init__(self, lr=0.01, iterations = 10000):
        self.lr = lr
        self.iterations = iterations
        
    ## remember, python wont let you call methods that start with a __dunder externally  
    def __add_intercept(self, X):
        intercept = np.ones(X.shape[0]).reshape(X.shape[0], 1) ## bias initial just a bunch of ones
        return np.concatenate((intercept, X), axis=1)

    def __sigmoid(self, z): 
        return (1/(1+np.exp(-z)))

    def __loss(self, h, y):
        return (- (y * np.log(h) + (1-y) * np.lop(1-h)))

    def fit(self, X, y): 
        X=self.__add_intercept(X)
        self.theta = np.zeros(X.shape[1])

        for i in range(self.iterations):
            z=np.dot(X, self.theta)
            h=self.__sigmoid(z)

            gradient = np.dot(X.T, (h-y)) / y.shape[0]
            self.theta = self.theta - self.lr * gradient

    def predict_probs(self, X, theta):
        X=self.__add_intercept(X)
        h=self.__sigmoid(np.dot(X, self.theta))
        return h
    
    def predict(self, X, theta, threshold = 0.5):
        h = self.predict_probs(X, theta)
        return (h >= threshold)*1
 

In [4]:
model = LogisticRegression(lr=0.01, iterations=10000) # decrease learning rate to avoid vanishing gradient?
model.fit(X, y)
predictions = model.predict(X, model.theta)


# calculate training accuracy
acc = (predictions == y).mean()
print(acc)


0.9933333333333333


Genertic ML algo:

for i in number of interations: <br/>
    -train <br/>
    -loss <br/>
    -gradient (RMSE) <br/>
    -gradient descent (update weights) <br/>
    -validation on the validation set <br/>
    
    
after all of that you do a test of the test data to see how your model has done
