In [1]:
# General SVM Class that will be a base for all the other SVM Implementations
import torch
import numpy as np

class SVM(object):
    def __init__(self):
        self.W = None

        
    def train(self, X_train, y_train, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, print_progress=False, exit_diff=0.0005):
        """
            Takes in the training data and labels as well as training parameters.
            Updates the weights using stochastic gradient descent

            Inputs:
            - X_train: A PyTorch tensor of shape (N, D) containing training data; there are N
            training samples each of dimension D.
            - y_train: A PyTorch tensor of shape (N,) containing training labels; y[i] = c
            means that X[i] has label 0 <= c < C for C classes.
            - learning_rate: (float) learning rate for optimization.
            - reg: (float) regularization strength. (ie. lambda)
            - num_iters: (integer) number of steps to take when optimizing
            - batch_size: (integer) number of training examples to use at each step.
            - print_progress: (boolean) If true, print progress during optimization.
            - exit_diff: (float) condition to stop the gradient descent algorithm if the
            change in loss is too low.

            Returns: A tuple of:
            - W:        A PyTorch tensor giving the weight of SVM predictor
            - loss_all: A PyTorch tensor giving the values of the loss at each
                training iteration.
        """
        self.W, loss_history = train_linear_classifier(self.loss, self.W, X_train, y_train, learning_rate, reg, num_iters, batch_size, print_progress, exit_diff)
        
        return loss_history
        
    def predict(self, X):
        """
            Takes in the test data and outputs a prediction torch
        """
        # t: (N,C) class probability
        t = torch.matmul(X,W)

        y_pred = torch.sign(t,1)

        return y_pred

        
    def loss(self, W, X_batch, y_batch, reg):
    """
    Compute the loss function and its derivative.
    Subclasses will override this.

    Inputs:
    - W: A PyTorch tensor of shape (D, C) containing (trained) weight of a model.
    - X_batch: A PyTorch tensor of shape (N, D) containing a minibatch of N
      data points; each point has dimension D.
    - y_batch: A PyTorch tensor of shape (N,) containing labels for the minibatch.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an tensor of the same shape as W
    """
    raise NotImplementedError

    def sample_batch(X, y, num_train, batch_size):
        # randomly sample "batch_size" (Xi,yi) in (X,y)

        inx = torch.randint(0, num_train, (batch_size))

        X_batch = X[inx, :]
        y_batch = y[inx]

        return X_batch, y_batch

    def train_linear_classifier(loss_fun, W, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, print_progress=False, exit_diff=0.0005):
        N, D = X.shape

        # initialize weight
        if W is None:
            # number of classes
            C = torch.max(y)+1

            W = torch.zeros([D, C], dtype=X.dtype, device=X.device)
        else:
            C = W.shape[1]

        # Stochastic Gradient Descent
        loss_history = torch.zeros(num_iters, dtype=X.dtype, device=X.device)
        for it in xrange(num_iters):
            # sample batch
            X_batch, y_batch = sample_batch(X, y, N, batch_size)

            # compute loss and gradient
            loss, grad = loss_fun(X_batch, y_batch, reg)
            loss_history.append(loss)

            # update weight
            W = W - learning_rate*grad

            # early stopping
            if i > 0:
                if loss_history[i] - loss_history[i-1] < exit_diff:
                    break

            if print_progress and i % 100 == 0:
                print('iteration %d / %d: loss %f' % (i, num_iters, loss_history[i]))

        # return 
        return W, loss_history
    
class LinearSVM(SVM):
    def loss(self, W, X_batch, y_batch, reg):
    """
    Compute the loss function and its derivative.
    Subclasses will override this.

    Inputs:
    - W: A PyTorch tensor of shape (D, C) containing (trained) weight of a model.
    - X_batch: A PyTorch tensor of shape (N, D) containing a minibatch of N
      data points; each point has dimension D.
    - y_batch: A PyTorch tensor of shape (N,) containing labels for the minibatch.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an tensor of the same shape as W
    """
    

SyntaxError: invalid syntax (<ipython-input-1-9014a466bc9d>, line 16)

# Note

* Do we use bias? If so, where does the bias is append?
* Not sure why the template seperate train into train_linear_classifier