<a href="https://colab.research.google.com/github/dropthejase/ml_training/blob/main/ml_from_scratch/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
class LogisticRegression():

  def __init__(self, iter=1000, lr=0.001, reg=None, reg_lambda=0.01):
    self.iter = iter
    self.lr = lr

    self.reg = reg
    self.reg_lambda = reg_lambda

    self.weight = None
    self.bias = None
  
  def fit(self, X_train, y_train):

    N, D = X_train.shape
    
    # initialise w and b
    self.weight = np.ones(D)
    self.bias = 0

    # regularisation
    if self.reg == 'l1':
      reg_dw = self.reg_lambda * (self.weight / np.abs(self.weight))
    elif self.reg == 'l2':
      reg_dw = self.reg_lambda * self.weight
    else:
      reg_dw = 0

    # gradient descent
    for _ in range(self.iter):

      y_pred = self.predict(X_train)

      dw = (1/N) * np.dot(X_train.T, (y_pred - y_train)) + reg_dw
      db = (1/N) * np.sum(y_pred - y_train)

      self.weight -= self.lr * dw
      self.bias -= self.lr * db

      #print(self.weight)

  def predict(self, X):
    a = np.dot(X, self.weight) + self.bias
    y_pred = 1 / (1 + np.exp(-a))
    return y_pred

  def score(self, X_test, y_test, print=False):
    N, D = X_test.shape
    y_pred = np.round(self.predict(X_test)).astype(int)
    accuracy = np.sum(y_pred == y_test) / N

    return np.round(accuracy, 2)


**LOAD SAMPLE DATA**

In [None]:
from sklearn import datasets

iris = datasets.load_iris()

In [None]:
df = pd.DataFrame(iris.data, columns=['sepal_length','sepal_width','petal_length','petal_width'])
df['target'] = iris.target

In [None]:
# remove a type of iris to leave two only
df = df[df['target']<2]

In [None]:
df = df.sample(frac = 1)

In [None]:
df.reset_index(inplace=True)

In [None]:
df = df.drop('index',axis=1)

**TRAIN TEST SPLIT**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop('target',axis=1)
y = df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

**TEST THE MODEL**

In [None]:
reg = LogisticRegression()

In [None]:
reg.fit(X_train, y_train)

In [None]:
{'weights': np.round(reg.weight,2), 'biases': np.round(reg.bias,2)}

{'weights': array([-0.39,  0.01,  0.68,  0.97]), 'biases': -0.28}

In [None]:
np.round(reg.predict(X_test),).astype(int)

array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1])

In [None]:
reg.score(X_test, y_test)

1.0

In [None]:
eval = pd.DataFrame(y_test)
eval['predicted'] = np.round(reg.predict(X_test),).astype(int)
eval

**Compare with Scikit Learn**

In [None]:
from sklearn import linear_model

In [None]:
reg_scikit = linear_model.LogisticRegression()

In [None]:
reg_scikit.fit(X_train, y_train)

LogisticRegression()

In [None]:
{'weights': np.round(reg_scikit.coef_,2), 'biases': np.round(reg_scikit.intercept_,2)}

{'weights': array([[ 0.37, -0.77,  2.13,  0.9 ]]), 'biases': array([-5.96])}

In [None]:
np.round(reg_scikit.predict(X_test),0)

array([0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1])

In [None]:
reg_scikit.score(X_test, y_test)

1.0