In [1]:
"""
Train model and eval model helpers.
"""
from __future__ import print_function

import numpy as np
#import cvxopt
#import cvxopt.solvers


def train_model(data, model, learning_rate=0.001, batch_size=16,
                num_steps=1000, shuffle=True):
    """Implements the training loop of stochastic gradient descent.

    Performs stochastic gradient descent with the indicated batch_size.

    If shuffle is true:
        Shuffle data at every epoch, including the 0th epoch.

    If the number of example is not divisible by batch_size, the last batch
    will simply be the remaining examples.

    Args:
        data(dict): Data loaded from io_tools
        model(LinearModel): Initialized linear model.
        learning_rate(float): Learning rate of your choice
        batch_size(int): Batch size of your choise.
        num_steps(int): Number of steps to run the updated.
        shuffle(bool): Whether to shuffle data at every epoch.

    Returns:
        model(LinearModel): Returns a trained model.
    """

    # Performs gradient descent. (This function will not be graded.)
   
    x = data['image']
    y = data['label'].reshape(-1,1)
    #print("shape of y  :", y.shape)
    batch_num = int(np.ceil(len(x)/batch_size))
    for epoch in range(num_steps):
        if shuffle:
            import random
            permutation = np.random.permutation(x.shape[0])
            x = np.array(x[permutation])
            y = np.array(y[permutation])
        for batch in range(batch_num):
            batch_start = batch * batch_size
            batch_end = (batch + 1 ) * batch_size 
            if batch+1 == batch_num:
                batch_end = len(x)
            x_batch = np.array([x[i] for i in range(batch_start, batch_end)])
            y_batch = y[batch_start:batch_end]
            #print(y_batch.shape)

            update_step(x_batch, y_batch, model, learning_rate)
        f = model.forward(x)    
        print(model.total_loss(f, y))
            
        
    return model



def update_step(x_batch, y_batch, model, learning_rate):
    """Performs on single update step, (i.e. forward then backward).

    Args:
        x_batch(numpy.ndarray): input data of dimension (N, ndims).
        y_batch(numpy.ndarray): label data of dimension (N, 1).
        model(LinearModel): Initialized linear model.
    """
    f = model.forward(x_batch)
    total_grad = model.backward(f, y_batch)
    model.w = model.w - learning_rate * total_grad

def train_model_qp(data, model):
    """Computes and sets the optimal model wegiths (model.w) using a QP solver.

    Args:
        data(dict): Data from utils.data_tools.preprocess_data.
        model(SupportVectorMachine): Support vector machine model.
    """
    P, q, G, h = qp_helper(data, model)
    P = cvxopt.matrix(P, P.shape, 'd')
    q = cvxopt.matrix(q, q.shape, 'd')
    G = cvxopt.matrix(G, G.shape, 'd')
    h = cvxopt.matrix(h, h.shape, 'd')
    sol = cvxopt.solvers.qp(P, q, G, h)
    z = np.array(sol['x'])
    # Implementation here (do not modify the code above)
    pass
    # Set model.w
    model.w = None


def qp_helper(data, model):
    """Prepares arguments for the qpsolver.

    Args:
        data(dict): Data from utils.data_tools.preprocess_data.
        model(SupportVectorMachine): Support vector machine model.

    Returns:
        P(numpy.ndarray): P matrix in the qp program.
        q(numpy.ndarray): q matrix in the qp program.
        G(numpy.ndarray): G matrix in the qp program.
        h(numpy.ndarray): h matrix in the qp program.
    """
    P = None
    q = None
    G = None
    h = None
    # Implementation here.
    return P, q, G, h


def eval_model(data, model):
    """Performs evaluation on a dataset.

    Args:
        data(dict): Data loaded from io_tools.
        model(LinearModel): Initialized linear model.

    Returns:
        loss(float): model loss on data.
        acc(float): model accuracy on data.
    """
    # Implementation here.
    f = model.forward(data['image'])
    pred = model.predict(f).astype(int)
    y = data['label'].astype(int)
    loss = model.total_loss(f, y) + model.w_decay_factor * np.sum(np.power(model.w,2))
    acc = np.sum([1 if y[i] == pred[i] else 0  for i in range(len(y))]) /len(y)
    return loss, acc


In [2]:
"""Implements support vector machine."""

from __future__ import print_function
from __future__ import absolute_import

import numpy as np
from models.linear_model import LinearModel



class SupportVectorMachine(LinearModel):
    """Implements a linear regression mode model"""

    def backward(self, f, y):
        """Performs the backward operation based on the loss in total_loss.

        By backward operation, it means to compute the gradient of the loss
        w.r.t w.

        Hint: You may need to use self.x, and you made need to change the
        forward operation.

        Args:
            f(numpy.ndarray): Output of forward operation, dimension (N,1).
            y(numpy.ndarray): Ground truth label, dimension (N,1).
        Returns:
            total_grad(numpy.ndarray): Gradient of L w.r.t to self.w,
              dimension (ndims+1, 1).
        """
        
        reg_grad = None
        loss_grad = None
        # Implementation here.
        reg_grad = self.w_decay_factor * self.w
        unit = np.maximum(0, np.sign(1-np.multiply(y,f)))
        print(np.multiply(y,f).shape)
        loss_grad = - np.dot(np.transpose( y * unit), self.x)
        print(np.transpose( y * (np.maximum(0,np.sign(1 - y*f)))).shape)
        print(self.shape)
        
#        print(np.maximum(0,np.sign(1 - y*f).shape, loss_grad.shape, self.x*y * (np.maximum(0,np.sign(1 - y*f))).shape)
        total_grad = reg_grad + loss_grad
        return total_grad

    def total_loss(self, f, y):
        """The sum of the loss across batch examples + L2 regularization.
        Total loss is hinge_loss + w_decay_factor/2*||w||^2

        Args:
            f(numpy.ndarray): Output of forward operation, dimension (N,1).
            y(numpy.ndarray): Ground truth label, dimension (N,1).
        Returns:
            total_loss (float): sum hinge loss + reguarlization.
        """
        # Implementation here.
        hinge_loss = np.sum(np.maximum(0. , 1. - np.multiply(y,f)))
        l2_loss = 0.5 * self.w_decay_factor * np.sum((np.power(self.w,2)))
        total_loss = hinge_loss + l2_loss
        return total_loss

    def predict(self, f):
        """Converts score to prediction.

        Args:
            f(numpy.ndarray): Output of forward operation, dimension (N,1).
        Returns:
            (numpy.ndarray): Hard predictions from the score, f,
              dimension (N,1). Tie break 0 to 1.0.
        """
        # Implementation here.
        f = f.reshape(-1,1)

        
        return np.sign(f)


In [None]:
print(np.maximum(0,np.sign(1 - y*f)).shape, loss_grad.shape, (self.x*y * (np.maximum(0,np.sign(1 - y*f)))).shape)
loss_grad = np.sum( - np.dot(np.transposet( y * (np.maximum(0,np.sign(1 - y*f)))), self.x), axis=0)

