In [2]:
import random
import numpy as np
from data_process import get_CIFAR10_data
import math
from scipy.spatial import distance
#from models import KNN, Perceptron, SVM, Softmax
from kaggle_submission import output_submission_csv
%matplotlib inline

# Loading CIFAR-10

In the following cells we determine the number of images for each split and load the images.

In [3]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 49000
VAL_IMAGES = 1000
TEST_IMAGES = 5000

In [4]:
data = get_CIFAR10_data(TRAIN_IMAGES, VAL_IMAGES, TEST_IMAGES)
X_train, y_train = data['X_train'], data['y_train']
X_val, y_val = data['X_val'], data['y_val']
X_test, y_test = data['X_test'], data['y_test']

Convert the sets of images from dimensions of **(N, 3, 32, 32) -> (N, 3072)** where N is the number of images so that each **3x32x32** image is represented by a single vector.

In [5]:
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)

if False:
    X_train = X_train / 135

    X_test = X_test / 135

    X_val = X_val / 135


(49000, 3072) (49000,) (1000, 3072) (1000,) (5000, 3072) (5000,)


### Get Accuracy

This function computes how well your model performs using accuracy as a metric.

In [6]:
def get_acc(pred, y_test):
    return np.sum(y_test==pred)/len(y_test)*100

# SVM Class

In [19]:
import numpy as np


class SVM():
    def __init__(self,alpha=.01, epochs=100, learning_rate = .001, reg_const= .001, minibatch_sz = 256):
        """
        Initialises Softmax classifier with initializing 
        weights, alpha(learning rate), number of epochs
        and regularization constant.
        """
        self.w = None
        self.alpha = alpha
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.reg_const = reg_const
        ## adding
        np.random.seed(90)
        self.minibatch_sz = 256
        self.debug=True
        

    """
    https://stats.stackexchange.com/questions/155088/gradient-for-hinge-loss-multiclass
    http://cs231n.github.io/optimization-1/#analytic
    """
    def calc_gradient(self, X: np.ndarray, y:np.ndarray, epoch=0) -> ( np.ndarray, float ): 
        
        """
            24:48 of video https://www.youtube.com/watch?v=h7iBpEHGVNc&t=2330s
            csc231 2017 lecture 3        
            Calculate gradient of the svm hinge loss.

            Inputs have dimension D, there are C classes, and we operate on minibatches
            of N examples.

            Inputs:
            - X_train: A numpy array of shape (N, D) containing a minibatch of data.
            - y_train: A numpy array of shape (N,) containing training labels; y[i] = c means
            that X[i] has label c, where 0 <= c < C.

            Returns:
            - gradient with respect to weights W; an array of same shape as W
        """
        this_loss = 0.0
        grad_w = np.zeros(self.w.shape) # initialize the gradient as zero
        assert(grad_w.shape == self.w.shape)
        N = X.shape[0]
        
        #regularization = self.l2_regularization_euclidean(self.w ,reg_const=self.reg_const)
   
        ### <!---- vectorized hinge loss
    
        ## calculate data loss
        a_scores = X.dot(self.w.T) #[a]
        a_s_y_i = np.reshape(a_scores[0][y],( a_scores[0][y].size, 1))
        a_margins = np.maximum(0,a_scores - a_s_y_i + .87)  ## calculate the pairwise margins in scores
        a_margins[y] = 0  ## zero out the margins for the correct classes
        a_data_loss = np.sum(a_margins) /  a_margins.shape[0] ## normalize by counts of training size (batch)
        if epoch % 50 == 0 : print("my", a_data_loss)
        ## calculate regularization cost
        a_l2_regularization_loss =  self.l2_regularization_euclidean(self.w, self.reg_const)
        a_loss_i = a_data_loss + a_l2_regularization_loss
   
            
    
    
    
    
        ### --->
        
        scores = X.dot(self.w.T) ##[a]
        
        true_class_scores = scores[np.arange(scores.shape[0]),y] # 
        # calculate pairwose margins
        margins = np.maximum(0, scores - np.transpose(np.matrix(true_class_scores)) + 1)  #
        # set the true class margin to zero
        margins[np.arange(N),y] = 0 #
        
        #if self.debug: print('margins/grad_w', margins.shape, grad_w.shape)
        
        
        this_loss = np.mean(np.sum(margins, axis=1))  ##calculate data loss
        this_loss += 1 * self.reg_const * np.linalg.norm(self.w)**2 ##np.sum(self.w*self.w)**2
        
        if self.debug and epoch % 50 == 0: print('loss at {}th'.format(epoch+1), this_loss)
        
        #print("reg ",  0.5 * self.l2_regularization_euclidean(self.w ,reg_const=self.reg_const))

        
        ##margins is used to seed the gradient calculation
        binarized = margins
        assert(binarized.shape==margins.shape)
        ## the derivative of the hinge function is the indicator function
        ## called on each margin calculation - where a non-zero value results in a 1 
        ## a 1 indicates a violation of the true class and incurs a penalty
        binarized[margins > 0 ] = 1
        ## binarized sum  
        row_sum = np.sum(binarized, axis=1)
        #if self.debug: print("row_sum", row_sum.shape)
        assert(grad_w.shape == self.w.shape)
        binarized[np.arange(N), y] = - np.transpose(row_sum)
        
        assert(binarized.shape == margins.shape)
        
        grad_w = np.dot(np.transpose(binarized), X)
        assert(grad_w.shape == self.w.shape)
        
        # average gradient
        grad_w = grad_w / N
        assert(grad_w.shape == self.w.shape)

        # apply regularization to gradient vector
        
        
        assert(grad_w.shape == self.w.shape)
        
        grad_w += self.alpha*self.w
        
        
        assert(grad_w.shape == self.w.shape)
        return grad_w, this_loss
    ########  

        
    def train(self, X_train : np.ndarray, y_train : np.ndarray):
        if self.debug: print(np.min(X_train[0]), np.max(X_train[0]), np.mean(X_train[0]))
        if self.debug: print("y_train", y_train.shape)
        current_epoch = 0
        """
        Train SVM classifier using stochastic gradient descent.

        Inputs:
        - X_train: A numpy array of shape (N, D) containing training data;
        N examples with D dimensions
        - y_train: A numpy array of shape (N,) containing training labels;
        
        Hint : Operate with Minibatches of the data for SGD
        """
        # initialize weight
        self._initialize_weights(X_train, y_train)
        loss_i = 99999999
        
        for X_batch, y_batch in self._minibatches(X_train, y_train, self.minibatch_sz): # [1]
            gradient_w, this_loss = self.calc_gradient( X_batch, y_batch, epoch=current_epoch) #[2]
            if abs(this_loss - loss_i) < .00001:
                break
            loss_i = this_loss
            # update  [3]
            
            self.w += gradient_w * self.learning_rate
            
            current_epoch += 1
            if current_epoch == self.epochs:
                break
                
        #pcode:
        #while True:
        #    [1] data_batch = sample_training_data(data, 256) # sample 256 examples
        #    weights_grad = evaluate_gradient(loss_fun, data_batch, weights)
        #    weights += - step_size * weights_grad # perform parameter update        

    def _minibatches(self, X: np.ndarray ,y: np.ndarray ,batchsize : int = 32, offset : int = 0):
        ## returns a slice of training data (X and y) of specified batchsize and optional offset into the arrays
        start_pos = 0 + offset
        shift_index = 0
        while start_pos < X.shape[0]:
            block = slice(start_pos, start_pos + batchsize)
            yield X[block], y[block]
            if (batchsize + start_pos) > X.shape[0]:
                shift_index += 1
            start_pos = shift_index + ((batchsize + start_pos) % X.shape[0])
        
    def _unique_labels(self, y: np.ndarray):
        """assembles the unique labels in an array, returns count and label vector"""
        labels = np.unique(y)
        return (labels).shape[0], labels

    def _initialize_weights(self, X:np.ndarray, y:np.ndarray):
        """initializes a weight vector for each label """
        label_count, labels = self._unique_labels(y)
        ## initializing with random numbers
        w = np.random.rand(label_count,X.shape[1])
        self.w = w

    def l2_regularization_euclidean(self, w : np.ndarray ,reg_const=.01):
        return reg_const * np.linalg.norm(w) 

    def predict(self, X_test):
        print('pred')
        pred = None
        """
        Use the trained weights of svm classifier to predict labels for
        data points.

        Inputs:
        - X_test: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.

        Returns:
        - pred: Predicted labels for the data in X_test. pred is a 1-dimensional
          array of length N, and each element is an integer giving the predicted
          class.
        """
        if self.debug: print('w', self.w.shape, 'xT',np.transpose(X_test).shape,  X_test.shape)
        
        pred = X_test.dot(np.transpose(self.w))
        
        return pred


# Support Vector Machines (with SGD)

### Train SVM

In [20]:

svm = SVM(
    alpha=.01
    , epochs=200
    , learning_rate = .0001
    , reg_const= .05
    , minibatch_sz = 256)

from datetime import datetime 
print(datetime.now())
svm.train(X_train, y_train)

2020-02-12 16:08:28.629205
-134.36308163265306 134.95661224489794 -17.242317429315477
y_train (49000,)
my 322237.40485397715
loss at 1th 5962.889788532857
my 34254.22556773161
loss at 51th 69059.17648930868
my 169963.29476600798
loss at 101th 167988.3763674995
my 1075749.2774581518
loss at 151th 241454.85666675388


In [121]:
print(20599218/100827)

204.30259751852182


Next, you will implement a "soft margin" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.

We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.

SVM optimized with SGD has 3 hyperparameters that you can experiment with :
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Epochs** - similar to as defined above in Perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin.

You will implement the SVM using SGD in the **models/SVM.py**

The following code: 
- Creates an instance of the SVM classifier class 
- The train function of the SVM class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

In [None]:
pred_svm = svm.predict(X_train)
print('The training accuracy is given by : %f' % (get_acc(pred_svm, y_train)))

### Validate SVM

In [None]:
pred_svm = svm.predict(X_val)
print('The validation accuracy is given by : %f' % (get_acc(pred_svm, y_val)))

### Test SVM

In [None]:
pred_svm = svm.predict(X_test)
print('The testing accuracy is given by : %f' % (get_acc(pred_svm, y_test)))

### SVM Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 SVM. Use the following code to do so:

In [None]:
output_submission_csv('svm_submission.csv', svm.predict(X_test))

# Softmax Classifier (with SGD)

Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. 

Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax

Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy

The softmax classifier has 3 hyperparameters that you can experiment with :
- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.
- **Number of Epochs** - As described for perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective.

You will implement a softmax classifier using SGD in the **models/Softmax.py**

The following code: 
- Creates an instance of the Softmax classifier class 
- The train function of the Softmax class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

### Train Softmax

In [None]:
softmax = Softmax()
softmax.train(X_train, y_train)

In [None]:
pred_softmax = softmax.predict(X_train)
print('The training accuracy is given by : %f' % (get_acc(pred_softmax, y_train)))

### Validate Softmax

In [None]:
pred_softmax = softmax.predict(X_val)
print('The validation accuracy is given by : %f' % (get_acc(pred_softmax, y_val)))

### Testing Softmax

In [None]:
pred_softmax = softmax.predict(X_test)
print('The testing accuracy is given by : %f' % (get_acc(pred_softmax, y_test)))

### Softmax Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Softmax. Use the following code to do so:

In [None]:
output_submission_csv('softmax_submission.csv', softmax.predict(X_test))