# Simple Logistic Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [2]:
data = pd.read_csv("./datasets/loan_data.csv", usecols=["person_income", "loan_status"])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45000 entries, 0 to 44999
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   person_income  45000 non-null  float64
 1   loan_status    45000 non-null  int64  
dtypes: float64(1), int64(1)
memory usage: 703.3 KB


In [3]:
# Created dummy data for testing, concept: if income > 25 => loan approved 1 else not approved 0
data = pd.DataFrame({
  "person_income": [12, 25, 56, 26, 29, 8, 9, 17, 28, 22, 29, 9, 67, 33, 32, 21],
  "loan_status":   [ 0,  0,  1,  1,  1, 0, 0,  0,  1,  0,  1, 0,  1,  1,  1, 0]
})

In [None]:
def gradient_descent(w_now, b_now, x, y_tar, lr=0.0001):
  ''' 
  m : no.of records, a column matrix of shape (m, 1)
  x : feature matrix, a column matrix of shape (m, 1)
  y_tar : target matrix, a column matrix of order (m, 1)
  y_hat : predicted metrix, a column matrix of order (m, 1)
  w_now : current weight, a real number
  w_new : new weight, a real number
  dw    : weight gradient, a real number
  b_now : current bias, a real number
  b_new : new bias, a real number
  db    : bias gradient
  '''
  m = x.shape[0]
  y_hat = 1 / (1 + np.exp(-(w_now * x + b_now)))
  dw = (1 / m) * (y_hat.T @ ((y_hat - y_tar) * (1 - y_hat) * x))
  db = (1 / m) * (y_hat.T @ ((y_hat - y_tar) * (1 - y_hat)))
  w_new = w_now - lr * dw.item()
  b_new = b_now - lr * db.item()
  print(w_new, b_new)
  return w_new, b_new

class SimpleLogisticRegression():
  def __init__(self):
    self.b = 0
    self.w = 0
  def fit(self, x, y, epochs=50):

    # make sure both tare numpy arrays and do not modify original data.
    if not isinstance(x, np.ndarray):
      x = x.values.copy().reshape(-1, 1)
    if not isinstance(y, np.ndarray):
      y = y.values.copy().reshape(-1, 1)

    # feature scaling, MinMaxScaler
    # NOTE: needed due to resolution of floating-values.
    min = x.min()
    max = x.max()
    for val in x:
      val[0] = (val[0] - min) / (max - min)

    for _ in range(epochs):
      self.w, self.b = gradient_descent(self.w, self.b, x, y)

    self.yfit = self.w * x + self.b
    print(self.w) # For debugging

model1d = SimpleLogisticRegression()
model1d.fit(data["person_income"], data["loan_status"])

pd.DataFrame({
  "person_income": data["person_income"],
  "loan_status": data["loan_status"],
  "predict_status": model1d.yfit.reshape(-1),
})

7.8125e-07 0.0
1.5624996948240998e-06 -3.051759005223698e-13
2.3437490844722987e-06 -9.15525913761095e-13
3.1249981689445974e-06 -1.8310482494815486e-12
3.906246948240996e-06 -3.0517411226532736e-12
4.6874954223614935e-06 -4.577602743388587e-12
5.468743591306091e-06 -6.408631323881475e-12
6.249991455074787e-06 -8.544825076325923e-12
7.031239013667584e-06 -1.0986182213262862e-11
7.81248626708448e-06 -1.3732700946886278e-11
8.593733215325474e-06 -1.6784379484532928e-11
9.37497985839057e-06 -2.0141216043600973e-11
1.0156226196279764e-05 -2.380320883385578e-11
1.0937472228993058e-05 -2.7770356067838284e-11
1.1718717956530451e-05 -3.2042655955313857e-11
1.2499963378891945e-05 -3.662010671125204e-11
1.3281208496077539e-05 -4.150270654784682e-11
1.4062453308087231e-05 -4.6690453674863573e-11
1.4843697814921024e-05 -5.2183346304843224e-11
1.5624942016578917e-05 -5.7981382652408373e-11
1.6406185913060908e-05 -6.408456092767134e-11
1.7187429504367e-05 -7.04928793403975e-11
1.7968672790497187e-05

Unnamed: 0,person_income,loan_status,predict_status
0,12,0,-3.738054e-10
1,25,0,-3.738054e-10
2,56,1,-3.738054e-10
3,26,1,-3.738054e-10
4,29,1,-3.738054e-10
5,8,0,-3.738054e-10
6,9,0,-3.738054e-10
7,17,0,-3.738054e-10
8,28,1,-3.738054e-10
9,22,0,-3.738054e-10
