<a href="https://colab.research.google.com/github/himanshugaur17/machine-learning-algos/blob/main/multi_class_logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import seaborn as sns
import scipy.sparse as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
    X = np.array([[-2, 4], [4, 1], [1, 6], [2, 4], [6, 2]])
    y = np.array([0, 0, 1, 1, 1])
    np.shape(y.T)

(5,)

In [40]:
class LRClassifier:
    def __init__(self, lam):
      self.regularizer=lam
      self.theta=None
      self.b=None

    def loss(self, h, y):
      n=y.shape[0]
      squared_theta_sum=np.dot(self.theta,self.theta.T)
      average_squared_theta_sum=(self.regularizer*squared_theta_sum)/(2*n)
      log_loss=(np.dot(y,np.log(h).T)+np.dot(1-y,np.log(1-h).T))/(2*n)
      return average_squared_theta_sum-log_loss

    def fit(self, X, y, n_iters = 100, alpha = 1):
      n,k=X.shape
      loss_list=[]
      self.theta=np.zeros((k,))
      self.b=0
      h=self.decision_function(X)
      loss_list.append(self.loss(h,y))
      for i in range(n_iters):
        h=self.decision_function(X)
        error=h-y
        error_multiplied_by_x=np.dot(X.T,error)+np.multiply(2*self.regularizer,self.theta)
        self.b=self.b-(alpha/(2*n))*(np.sum(error,axis=0))
        self.theta=self.theta-np.multiply((alpha/(2*n)),error_multiplied_by_x)
        h=self.decision_function(X)
        loss_list.append(self.loss(h,y))
      return loss_list

    def get_params(self):
      return self.theta,self.b

    def sigmoid(self,x):
      return 1.0/(1.0+np.exp(-x))

    def decision_function(self, X):
      fx=np.dot(X,self.theta.T)+self.b
      h=self.sigmoid(fx)
      return h

    def predict(self, X):
      h=self.decision_function(X)
      return np.where(h>=0.5,1,0)

def binary_lr_classifier(lam = 1e-4):
    return LRClassifier(lam)

In [4]:
def test_binary_lr_classifier():
    X = np.array([[-2, 4], [4, 1], [1, 6], [2, 4], [6, 2]])
    y = np.array([0, 0, 1, 1, 1])
    lr = binary_lr_classifier(lam = 1e-4)

    # before gradient descent
    losses = lr.fit(X, y, n_iters = 0)
    theta, b = lr.get_params()
    print(losses)
    assert np.allclose(theta, [0, 0])
    assert b == 0

    # 1000 iterations
    losses = lr.fit(X, y, n_iters = 1000)
    theta, b = lr.get_params()
    assert np.allclose(theta, [1.62475335, 2.97699553])
    assert np.allclose(b, -12.016701793625622)
    assert np.allclose(losses[-1], 0.0178892651602277)
    assert np.allclose(lr.decision_function(X), [0.0336268115487116, 0.07305423924580728, 0.9994304104089492, 0.9585441655688948, 0.9755365947084815])
    assert list(lr.predict(X)) == [0, 0, 1, 1, 1]
    print("All tests passed!")


test_binary_lr_classifier()

[0.5 0.5 0.5 0.5 0.5]
[0.3465735902799726]
[0.03362681 0.07305424 0.99943041 0.95854417 0.97553659]
All tests passed!


In [57]:
class MultiClassLRClassifier:
    def __init__(self, lam):
      self.regularizer=lam
      self.theta=None
      self.b=None

    def generate_label_matrix(self,Y,k):
      n=Y.shape[0]
      y=np.zeros((n,k))
      for i in range(n):
        y[i,Y[i]]=1
      return y

    def loss(self, h, y):
      n,k=h.shape
      squared_theta_sum=(self.regularizer)*(np.sum(np.square(self.theta)))
      cost=squared_theta_sum-np.sum(y*np.log(h))
      return cost/(2*n)

    def fit(self, X, Y, n_iters = 100, alpha = 1):

      classes_count=Y.max()+1
      k=classes_count
      n,d=X.shape
      self.theta=np.zeros((d,k))
      self.b=np.zeros((k,))
      lr = binary_lr_classifier(lam = 1e-4)
      y=self.generate_label_matrix(Y,k)
      for i in range(k):
        lr.fit(X,y.T[i],n_iters)
        self.theta[:,i],self.b[i]=lr.get_params()
      return self.loss(self.decision_function(X),y)


    def get_params(self):
      return self.theta,self.b

    def decision_function(self, X):
      fx=np.exp(np.dot(X,self.theta)+self.b)
      hx=fx/(np.sum(fx,axis=1).reshape(-1,1))
      return hx

    def predict(self, X):
      h=self.decision_function(X)
      return np.argmax(h,axis=1)

# do not modify this function
def multiclass_lr_classifier(lam = 1e-4):
    return MultiClassLRClassifier(lam)

In [None]:
def test_multiclass_lr_classifier():
    X = np.array([
        [1, 6], [1, 7], [2, 5], [2, 8],
        [4, 2], [4, 3], [5, 1], [5, 2],
        [5, 3], [6, 1], [6, 2], [9, 4],
        [9, 7], [10, 5], [10, 6], [11, 6],
        [5, 9], [5, 10], [5, 11], [6, 9],
        [6, 10], [7, 10], [8, 11]
    ])
    y = np.array([
        0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2,
        3, 3, 3, 3, 3, 3, 3
    ])
    multi_lr = multiclass_lr_classifier(lam = 1e-4)

    # before gradient descent
    final_loss = multi_lr.fit(X, y, n_iters = 0)
    thetas, bs = multi_lr.get_params()
    assert np.allclose(thetas, np.zeros((2, 4)))
    assert np.allclose(bs, np.zeros(4))
    assert np.allclose(final_loss, 0.6931471805599453)
    assert np.allclose(multi_lr.decision_function(X), np.full((len(y), 4), 0.25))
    assert list(multi_lr.predict(X)) == [0] * len(y)

    # gradient descent 1 iter
    final_loss = multi_lr.fit(X, y, n_iters = 1)
    thetas, bs = multi_lr.get_params()
    assert np.allclose(thetas, np.array(
        [[-1.3043478260869565, -0.6739130434782609, -0.3695652173913043, -0.5217391304347826],
         [-0.9347826086956521, -1.1956521739130435, -0.8913043478260869, 0.021739130434782608]]
    ))
    assert np.allclose(bs, [-0.16304347826086957, -0.09782608695652174, -0.14130434782608695, -0.09782608695652174])
    assert np.allclose(final_loss, 1.5948661586516544)
    assert np.allclose(multi_lr.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_lr_decision_function_1.npy"))
    assert list(multi_lr.predict(X)) == [3] * len(y)

    # gradient descent 2 iter
    final_loss = multi_lr.fit(X, y, n_iters = 2)
    thetas, bs = multi_lr.get_params()
    assert np.allclose(thetas, np.array(
        [[-1.1741525763887666, 0.08405329650269378, 0.6749300069596299, 0.250023946810188],
         [-0.3698696640278152, -0.8923347376289046, -0.2904085511104315, 1.252575005389721]]
    ))
    assert np.allclose(bs, [-0.07616801209977848, 0.05374654543619424, -0.03683221319180832, 0.006708777994691106])
    assert np.allclose(final_loss, 2.4507247738274804)
    assert np.allclose(multi_lr.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_lr_decision_function_2.npy"))
    assert list(multi_lr.predict(X)) == [3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

    # gradient descent 1000 iter
    final_loss = multi_lr.fit(X, y, n_iters = 1000)
    thetas, bs = multi_lr.get_params()
    assert np.allclose(thetas, np.array(
        [[-4.43691485302322, -0.15895242949854863, 1.8314459978612432, 0.11428400803141586],
         [1.4462434773266433, -2.243038284524103, -0.4123296983423064, 1.8358466248685217]]
    ))
    assert np.allclose(bs, [3.3982221320824473, 8.9389563873553, -12.268402031207522, -15.67416835171075])
    assert np.allclose(final_loss, 0.0020306591987543157)
    assert np.allclose(multi_lr.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_lr_decision_function_1000.npy"))
    assert list(multi_lr.predict(X)) == list(y)

    print("All tests passed!")

test_multiclass_lr_classifier()

In [12]:
X = np.array([
        [1, 6], [1, 7], [2, 5], [2, 8],
        [4, 2], [4, 3], [5, 1], [5, 2],
        [5, 3], [6, 1], [6, 2], [9, 4],
        [9, 7], [10, 5], [10, 6], [11, 6],
        [5, 9], [5, 10], [5, 11], [6, 9],
        [6, 10], [7, 10], [8, 11]
    ])
Y = np.array([
        0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2,
        3, 3, 3, 3, 3, 3, 3
    ])

In [13]:
      classes_count=Y.max()+1
      k=classes_count
      n,d=X.shape

In [14]:
theta=np.zeros((d,k))
b=np.zeros((k,))

In [15]:
theta

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [18]:
b

array([0., 0., 0., 0.])

In [24]:
res=np.exp(np.dot(X,theta)+b)
res=res/np.sum(res,axis=1).reshape(-1,1)
res

np.argmax(res,axis=1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0])

In [46]:
np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_lr_decision_function_1000.npy")

array([[9.99990410e-01, 4.47500324e-06, 1.18952478e-09, 5.11372566e-06],
       [9.99992338e-01, 1.11830297e-07, 1.85441211e-10, 7.54989236e-06],
       [9.86930825e-01, 1.27413199e-02, 3.97325990e-06, 3.23881661e-04],
       [9.98944793e-01, 2.01263517e-07, 1.52369318e-08, 1.05499068e-03],
       [2.32566492e-07, 9.99930762e-01, 6.87924349e-05, 2.12892518e-07],
       [9.30285119e-06, 9.99549140e-01, 4.28984439e-04, 1.25727948e-05],
       [8.06142791e-11, 9.99919292e-01, 8.07027097e-05, 4.73569021e-09],
       [3.22450465e-09, 9.99496482e-01, 5.03235275e-04, 2.79664161e-07],
       [1.28690738e-07, 9.96852362e-01, 3.13103041e-03, 1.64787246e-05],
       [1.11762241e-12, 9.99409676e-01, 5.90317656e-04, 6.22052392e-09],
       [4.45850005e-11, 9.96328405e-01, 3.67122813e-03, 3.66372725e-07],
       [3.34263719e-15, 1.74791366e-02, 9.82469941e-01, 5.09225622e-05],
       [8.60067764e-13, 7.01874190e-05, 9.57763933e-01, 4.21658800e-02],
       [4.13443382e-17, 3.89500073e-04, 9.99522395e