In [None]:
# General SVM Class that will be a base for all the other SVM Implementations
import torch
import numpy as np

class SVM(object):
    def __init__(self):
        self.W = None

        
    def train(self, X_train, y_train, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, verbose=False):
        """
            Takes in the training data and labels as well as training parameters.
            Updates the weights using stochastic gradient descent
        """
        
        
    def predict(self, X):
        """
            Takes in the test data and outputs a prediction torch
        """
        n = X.shape[1]
        X_til = np.vstack([np.ones((1, n)), X])
        pred = np.dot(self.W.T, X_til)
        return pred
        
        
    def loss(self, W, X_batch, y_batch, reg):
        """
        Compute the loss function and its derivative.
        Subclasses will override this.

        Inputs:
        - W: A PyTorch tensor of shape (D, C) containing (trained) weight of a model.
        - X_batch: A PyTorch tensor of shape (N, D) containing a minibatch of N
          data points; each point has dimension D.
        - y_batch: A PyTorch tensor of shape (N,) containing labels for the minibatch.
        - reg: (float) regularization strength.

        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an tensor of the same shape as W
        """
        raise NotImplementedError
    
    
class LinearSVM(SVM):
    def loss(self, W, X_batch, y_batch, reg):
        """
        Compute the loss function and its derivative.
        Subclasses will override this.

        Inputs:
        - W: A PyTorch tensor of shape (D, C) containing (trained) weight of a model.
        - X_batch: A PyTorch tensor of shape (N, D) containing a minibatch of N
          data points; each point has dimension D.
        - y_batch: A PyTorch tensor of shape (N,) containing labels for the minibatch.
        - reg: (float) regularization strength.

        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an tensor of the same shape as W
        """
        """
        Note:
        class label: -1, +1
        W: Weight [b w1... wd].T with shape (D+1,)
        L: Hinge loss max{0, 1-yi(w.T*xi+b)} for N data points. Shape is (N,).
        J: Objective function. Scalar.
        dJ: Derivative of J wrt w. Use subgradient at the non-differentiable points. Shape is (D+1, 1).
        """
        n = X_batch.shape[1]
        X = np.vstack([np.ones((1, n)), X_batch])
        L = 1 - y_batch * np.dot(W.T, X)
        idx = L[0,:]<0
        L[:, idx] = 0.0
        y_select = y_batch.copy() # for subgradient dJ
        y_select[:, idx] = 0 # if yi*theta.T*xi >1, no -yi*xi/n term

        J = np.sum(L)/n + reg/2*np.dot(W[1:,:].T, W[1:,:])
        dJ = -np.dot(X, y_select.T) /n + reg*W

        # ensure the size
        J = float(J)

        return J, dJ
    