<a href="https://colab.research.google.com/github/himanshugaur17/machine-learning-algos/blob/main/multi_class_svm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import seaborn as sns
import scipy.sparse as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
    X = np.array([[-2, 4], [4, 1], [1, 6], [2, 4], [6, 2]])
    y = np.array([0, 0, 1, 1, 1])
    np.shape(y.T)

(5,)

In [64]:
class SVMClassifier:
    def __init__(self, lam):
      self.regularizer=lam
      self.theta=None
      self.b=None

    def loss(self, h, y):
      n=y.shape[0]
      squared_theta_sum=np.dot(self.theta,self.theta.T)
      average_squared_theta_sum=(self.regularizer*squared_theta_sum)
      y_dot_h=1-h*y
      return (np.sum(y_dot_h.clip(min=0))+average_squared_theta_sum)/(2*n)

    def fit(self, X, y, n_iters = 100, alpha = 1):
      n,k=X.shape
      loss_list=[]
      self.theta=np.zeros((k,))
      self.b=0
      h=self.decision_function(X)
      loss_list.append(self.loss(h,y))
      for i in range(n_iters):
        h=self.decision_function(X)
        condition=y*h<=1

        y_sum=-1*(y[condition].sum())
        x_multiplied_y=(np.where(condition,1,0).reshape(-1,1))*(y.reshape(-1,1)*X)
        x_multiplied_y_summation=-1*(np.sum(x_multiplied_y,axis=0))+np.multiply(2*self.regularizer,self.theta)

        self.b=self.b-(alpha/(2*n))*(y_sum)
        self.theta=self.theta-(alpha/(2*n))*(x_multiplied_y_summation)

        h=self.decision_function(X)
        loss_list.append(self.loss(h,y))
      return loss_list

    def get_params(self):
      return self.theta,self.b

    def decision_function(self, X):
      h=np.dot(X,self.theta.T)+self.b
      return h

    def predict(self, X):
      h=self.decision_function(X)
      return np.where(h>=0,1,-1)

def binary_svm_classifier(lam = 1e-4):
    return SVMClassifier(lam)

In [75]:
class MultiClassSVMClassifier:
    def __init__(self, lam):
      self.regularizer=lam
      self.theta=None
      self.b=None

    def generate_label_matrix(self,Y,k):
      n=Y.shape[0]
      y=np.zeros((n,k))
      y.fill(-1)
      for i in range(n):
        y[i,Y[i]]=1
      return y

    def loss(self, h, y):
      n,k=h.shape
      squared_theta_sum=(self.regularizer)*(np.sum(np.square(self.theta)))
      hy=h*np.where(y<0,0,y)
      h_minus_yh=h-(hy.sum(axis=1).reshape(-1,1))+1
      h_minus_yh=h_minus_yh.clip(min=0)
      return (h_minus_yh.sum()-n+squared_theta_sum)/(2*n)

    def fit(self, X, Y, n_iters = 100, alpha = 1):
      classes_count=Y.max()+1
      k=classes_count
      n,d=X.shape
      self.theta=np.zeros((d,k))
      self.b=np.zeros((k,))
      lr = binary_svm_classifier(lam = 1e-4)
      y=self.generate_label_matrix(Y,k)
      for i in range(k):
        lr.fit(X,y.T[i],n_iters)
        self.theta[:,i],self.b[i]=lr.get_params()
      return self.loss(self.decision_function(X),y)

    def get_params(self):
      return self.theta,self.b


    def decision_function(self, X):
      h=np.dot(X,self.theta)+self.b
      return h

    def predict(self, X):
      h=self.decision_function(X)
      return np.argmax(h,axis=1)

# do not modify this function
def multiclass_svm_classifier(lam = 1e-4):
    return MultiClassSVMClassifier(lam)

In [63]:
    X = np.array([
        [1, 6], [1, 7], [2, 5], [2, 8],
        [4, 2], [4, 3], [5, 1], [5, 2],
        [5, 3], [6, 1], [6, 2], [9, 4],
        [9, 7], [10, 5], [10, 6], [11, 6],
        [5, 9], [5, 10], [5, 11], [6, 9],
        [6, 10], [7, 10], [8, 11]
    ])
    y = np.array([
        0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2,
        3, 3, 3, 3, 3, 3, 3
    ])


In [77]:
def test_multiclass_svm_classifier():
    X = np.array([
        [1, 6], [1, 7], [2, 5], [2, 8],
        [4, 2], [4, 3], [5, 1], [5, 2],
        [5, 3], [6, 1], [6, 2], [9, 4],
        [9, 7], [10, 5], [10, 6], [11, 6],
        [5, 9], [5, 10], [5, 11], [6, 9],
        [6, 10], [7, 10], [8, 11]
    ])
    y = np.array([
        0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 1,
        2, 2, 2, 2, 2,
        3, 3, 3, 3, 3, 3, 3
    ])
    multi_svm = multiclass_svm_classifier(lam = 1e-4)

    # before gradient descent
    final_loss = multi_svm.fit(X, y, n_iters = 0)
    thetas, bs = multi_svm.get_params()
    assert np.allclose(thetas, np.zeros((2, 4)))
    assert np.allclose(bs, np.zeros(4))
    assert np.allclose(final_loss, 1.5)
    assert np.allclose(multi_svm.decision_function(X), np.full((len(y), 4), 0))
    assert list(multi_svm.predict(X)) == [0] * len(y)

    # gradient descent 1 iter
    final_loss = multi_svm.fit(X, y, n_iters = 1)
    thetas, bs = multi_svm.get_params()
    assert np.allclose(thetas, np.array(
        [[-2.60869565, -1.34782609, -0.73913043, -1.04347826],
         [-1.86956522, -2.39130435, -1.7826087,   0.04347826]]
    ))
    assert np.allclose(bs, [-0.32608696, -0.19565217, -0.2826087, -0.19565217])
    assert np.allclose(final_loss, 4.607799712336648)
    assert np.allclose(multi_svm.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_svm_decision_function_1.npy"))
    assert list(multi_svm.predict(X)) == [3] * len(y)

    # gradient descent 2 iters
    final_loss = multi_svm.fit(X, y, n_iters = 2)
    thetas, bs = multi_svm.get_params()
    assert np.allclose(thetas, np.array(
        [[-2.478249527410208, -0.5869506616257089, 0.32609017013232533, -0.173908506616257],
         [-1.3043396975425332, -2.086946124763705, -1.1739052930056708, 1.282608506616257]]
    ))
    assert np.allclose(bs, [-0.2391304347826087, -0.043478260869565216, -0.17391304347826086, -0.08695652173913045])
    assert np.allclose(final_loss, 6.08933628122314)
    assert np.allclose(multi_svm.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_svm_decision_function_2.npy"))
    assert list(multi_svm.predict(X)) == [3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

    # gradient descent 1000 iters
    final_loss = multi_svm.fit(X, y, n_iters = 1000)
    thetas, bs = multi_svm.get_params()
    assert np.allclose(thetas, np.array(
        [[-3.0304888496823272, -0.173522930862437, 1.8837726999313502, -0.1064134037200739],
         [1.1689677242800083, -1.5149193085489543, -0.5185826453689177, 2.3815731713004644]]
    ))
    assert np.allclose(bs, [1.260869565217392, 6.56521739130436, -12.260869565217392, -20.695652173913004])
    assert np.allclose(final_loss, 4.864387185921901e-05)
    assert np.allclose(multi_svm.decision_function(X), np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_svm_decision_function_1000.npy"))
    assert list(multi_svm.predict(X)) == list(y)
    print("All tests passed!")


test_multiclass_svm_classifier()

All tests passed!


In [70]:
np.loadtxt("/content/drive/MyDrive/ApNotebook/machine_learning_v2_2/local_test_refs/multi_svm_decision_function_1000.npy")

array([[  5.24418706,  -2.69782139, -13.48859274,  -6.51262655],
       [  6.41315479,  -4.2127407 , -14.00717538,  -4.13105338],
       [  1.04473049,  -1.35642501, -11.08623739,  -9.00061312],
       [  4.55163366,  -5.90118294, -12.64198533,  -1.85589361],
       [ -8.52315038,   2.84128705,  -5.76294406, -16.35815945],
       [ -7.35418266,   1.32636774,  -6.2815267 , -13.97658627],
       [-12.72260696,   4.18268343,  -3.36058871, -18.84614602],
       [-11.55363923,   2.66776412,  -3.87917136, -16.46457285],
       [-10.38467151,   1.15284481,  -4.397754  , -14.08299968],
       [-15.75309581,   4.0091605 ,  -1.47681601, -18.95255942],
       [-14.58412808,   2.49424119,  -1.99539866, -16.57098625],
       [-21.33765918,  -1.05616622,   2.61875415, -12.12708012],
       [-17.83075601,  -5.60092415,   1.06300622,  -4.98236061],
       [-23.19918031,  -2.74460846,   3.98394421,  -9.85192035],
       [-22.03021259,  -4.25952777,   3.46536156,  -7.47034718],
       [-25.06070144,  -4