In [1]:
# EECS545 Hw3 (b)
# Subgradient.py
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
np.random.seed(0)
nuclear = sio.loadmat('nuclear.mat')

x = nuclear['x']
y = nuclear['y']

ones_x = np.ones((1,x.shape[1]))
x_with_ones = np.concatenate((ones_x,x))

n = x.shape[1]
reg = 0.001/(n) # lambda/n 

def get_L(theta, x_i, y_i):
    z = 1 - y_i * x_i.T.dot(theta)
    if(z>0):
        return z
    else:
        return 0
    

def get_J_i(theta, x_i, y_i):
    b = theta[0]**2
    L = get_L(theta, x_i, y_i)
    norm_theta_2 = theta.T.dot(theta)
    term2 = reg * (norm_theta_2 - b) / 2
    term1 = L/n
    J_i = term1 + term2
    J_i = np.reshape(J_i, (1,))
    return J_i

def get_u_i(theta, x_i, y_i):
    term2 = reg * (theta - theta[0])
    term1 = -(y_i/n) * x_i
    u_i = term2
    
    u_i = np.reshape(term1 + term2, (3,))
    #L = get_L(theta, x_i, y_i)
    #if(L>0):
    #    u_i += term1
    #u_i = np.reshape(u_i, (3,))
    return u_i


# main code:
# initialize theta
theta = np.zeros((x_with_ones.shape[0],1))
max_iter = 100
sum_J_i = 0
exit_diff = 0.0005
J = np.zeros((max_iter,))
num_iters = np.zeros((max_iter,))

for j in range(1,max_iter):
    u = np.zeros((theta.shape[0],n))
    # J = np.zeros((1,n))
    prev_J = sum_J_i
    sum_J_i = 0
    for i in range(n):
        x_i = np.reshape(x_with_ones[:,i],(3,1))
        y_i = y[:,i]

        # J[:,i] = get_J_i(theta, x_i, y_i)
        sum_J_i += get_J_i(theta, x_i, y_i)
        u[:,i] = get_u_i(theta, x_i, y_i)
    sum_u_i = np.sum(u,axis=1)
    # sum_J_i = np.sum(J,axis=1) 
    J[j-1] = sum_J_i
    num_iters[j-1] = j
    
    # stopping condition
    diff = np.abs(prev_J - sum_J_i)
    if(diff < exit_diff):
        break
    step = 100/j
    theta -= step * np.reshape(sum_u_i, (3,1))

print("theta is: ", theta)
print("last objective value is: ", J[j-1])


# first plot code:
negInd = y == -1
posInd = y == 1
plt.scatter(x[0, negInd[0, :]], x[1, negInd[0, :]], color='b')
plt.scatter(x[0, posInd[0, :]], x[1, posInd[0, :]], color='r')

# code to plot the separating line:
b = theta[0]
w1 = theta[1]
w2 = theta[2]
x1 = x[0,:]
x2 = (-b-w1*x1)/w2
x_min = min(x[0])
x_max = max(x[0])
plt.plot(x1,x2, color='k')

plt.savefig("problem 3_b_1")
plt.figure(1)
plt.show()



# second plot code:
plt.plot(num_iters[:j],J[:j])
plt.savefig("problem 3_b_2")
plt.figure(2)
plt.show()

tensor([1, 2])


In [None]:
# EECS 598 Hw2 SVM Implementation
import torch
import numpy as np

class SVMLinearClassifier(object):
  def __init__(self):
    random.seed(0)
    torch.manual_seed(0)
    self.W = None

  def train(self, X_train, y_train, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, verbose=False):
    train_args = (self.loss, self.W, X_train, y_train, learning_rate, reg,
                  num_iters, batch_size, verbose)
    self.W, loss_history = train_linear_classifier(*train_args)
    return loss_history

  def predict(self, X):
    return predict_linear_classifier(self.W, X)

  @abstractmethod
  def loss(self, W, X_batch, y_batch, reg):
    """
    Compute the loss function and its derivative.
    Subclasses will override this.

    Inputs:
    - W: A PyTorch tensor of shape (D, C) containing (trained) weight of a model.
    - X_batch: A PyTorch tensor of shape (N, D) containing a minibatch of N
      data points; each point has dimension D.
    - y_batch: A PyTorch tensor of shape (N,) containing labels for the minibatch.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an tensor of the same shape as W
    """
    raise NotImplementedError

  def _loss(self, X_batch, y_batch, reg):
    self.loss(self.W, X_batch, y_batch, reg)

  def save(self, path):
    torch.save({'W': self.W}, path)
    print("Saved in {}".format(path))

  def load(self, path):
    W_dict = torch.load(path, map_location='cpu')
    self.W = W_dict['W']
    print("load checkpoint file: {}".format(path))



def svm_loss_vectorized(W, X, y, reg):
  """
  Structured SVM loss function, vectorized implementation. When you implment
  the regularization over W, please DO NOT multiply the regularization term by
  1/2 (no coefficient). The inputs and outputs are the same as svm_loss_naive.

  Inputs:
  - W: A PyTorch tensor of shape (D, C) containing weights.
  - X: A PyTorch tensor of shape (N, D) containing a minibatch of data.
  - y: A PyTorch tensor of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as torch scalar
  - gradient of loss with respect to weights W; a tensor of same shape as W
  """
  loss = 0.0
  dW = torch.zeros_like(W) # initialize the gradient as zero

  #############################################################################
  # TODO:                                                                     #
  # Implement a vectorized version of the structured SVM loss, storing the    #
  # result in loss.                                                           #
  #############################################################################
  # Replace "pass" statement with your code
  scores = (X.matmul(W)).t() # CxN matrix
  N = scores.shape[1]
  temp_index = torch.arange(N)
  correct_scores = scores[y, temp_index]
  Margins = scores - correct_scores + torch.ones_like(scores)
  Margins[Margins<0] = 0
  loss = Margins.sum()/N - 1 # get the average loss and subtract 1 for the j=y_i case
  loss += reg * torch.sum(W * W)
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################

  #############################################################################
  # TODO:                                                                     #
  # Implement a vectorized version of the gradient for the structured SVM     #
  # loss, storing the result in dW.                                           #
  #                                                                           #
  # Hint: Instead of computing the gradient from scratch, it may be easier    #
  # to reuse some of the intermediate values that you used to compute the     #
  # loss.                                                                     #
  #############################################################################
  # Replace "pass" statement with your code
  Margins[Margins>0] = 1
  Margins[y, temp_index] = 0 # Set the y[i] margins to 0
  Margin_row = Margins.sum(dim=0) # Get the proper sum of positive margins where j != y_i
  Margins[y, temp_index] = -Margin_row # Set the value of the y_i margin row in each column to negative of the sum of rows
  # The above line simplifies the operation into a single matrix multiplication
  dW = (Margins.matmul(X)).t()/N # Transpose to get the same shape as W
  dW += 2*reg*W # Add reg. term
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################

  return loss, dW

def sample_batch(X, y, num_train, batch_size):
  """
  Sample batch_size elements from the training data and their
  corresponding labels to use in this round of gradient descent.
  """
  X_batch = None
  y_batch = None
  #########################################################################
  # TODO: Store the data in X_batch and their corresponding labels in     #
  # y_batch; after sampling, X_batch should have shape (batch_size, dim)  #
  # and y_batch should have shape (batch_size,)                           #
  #                                                                       #
  # Hint: Use torch.randint to generate indices.                          #
  #########################################################################
  # Replace "pass" statement with your code
  i = torch.randint(0,num_train, [batch_size])
  X_batch = X[i,:]
  y_batch = y[i]
  #########################################################################
  #                       END OF YOUR CODE                                #
  #########################################################################
  return X_batch, y_batch


def train_linear_classifier(loss_func, W, X, y, learning_rate=1e-3,
                            reg=1e-5, num_iters=100, batch_size=200,
                            verbose=False):
  """
  Train this linear classifier using stochastic gradient descent.

  Inputs:
  - loss_func: loss function to use when training. It should take W, X, y
    and reg as input, and output a tuple of (loss, dW)
  - W: A PyTorch tensor of shape (D, C) giving the initial weights of the
    classifier. If W is None then it will be initialized here.
  - X: A PyTorch tensor of shape (N, D) containing training data; there are N
    training samples each of dimension D.
  - y: A PyTorch tensor of shape (N,) containing training labels; y[i] = c
    means that X[i] has label 0 <= c < C for C classes.
  - learning_rate: (float) learning rate for optimization.
  - reg: (float) regularization strength.
  - num_iters: (integer) number of steps to take when optimizing
  - batch_size: (integer) number of training examples to use at each step.
  - verbose: (boolean) If true, print progress during optimization.

  Returns: A tuple of:
  - W: The final value of the weight matrix and the end of optimization
  - loss_history: A list of Python scalars giving the values of the loss at each
    training iteration.
  """
  # assume y takes values 0...K-1 where K is number of classes
  num_train, dim = X.shape
  if W is None:
    # lazily initialize W
    num_classes = torch.max(y) + 1
    W = 0.000001 * torch.randn(dim, num_classes, device=X.device, dtype=X.dtype)
  else:
    num_classes = W.shape[1]

  # Run stochastic gradient descent to optimize W
  loss_history = []
  for it in range(num_iters):
    # TODO: implement sample_batch function
    X_batch, y_batch = sample_batch(X, y, num_train, batch_size)

    # evaluate loss and gradient
    loss, grad = loss_func(W, X_batch, y_batch, reg)
    loss_history.append(loss.item())

    # perform parameter update
    #########################################################################
    # TODO:                                                                 #
    # Update the weights using the gradient and the learning rate.          #
    #########################################################################
    # Replace "pass" statement with your code
    W -= learning_rate * grad
    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################

    if verbose and it % 100 == 0:
      print('iteration %d / %d: loss %f' % (it, num_iters, loss))

  return W, loss_history