In [2]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

%load_ext autoreload
%autoreload 2


plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
class TwoLayerNN(object):
    """
                       Layer 1                   Layer 2                   Loss
                   ___________________     ______________________   ___________________
    Input (NxD) > {Hidden (DxH) > ReLU} > {Output (HxC) > Softmax} > Cross Entropy Loss
    
    where:
        N: Number of instances of input
        D: Dimensions of the input
        H: Num hidden layer neurons
        C: Num classes
    """
    def __init__(self, input_dim, hidden_dim, output_dim, std=1e-4):
        
        # Set the dimensions
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        
        # Weights initialization (naive)
        self.W1 = np.random.rand((input_dim, hidden_dim)) * std
        self.b1 = np.ones((hidden_dim,)) * 0.001 # Slight positive bias values to help ReLU units
        
        self.W2 = np.random.rand((hidden_dim, output_dim)) * std
        self.b2 = np.zeros((output_dim,))
        
        #######################################################################
        # TODO Use Xavier weights initialiation instead
        #######################################################################
        
    def loss(self, X, y, reg_loss_factor=0.0):
        """
        Given the batch {X, y} and the current set of weights, we calculate the cross entropy 
        loss. We also calculate the gradient of the loss wrt the weights and return them.
        
        We add L2 Regularization loss, with reg_loss acting as the weight of that loss.
        """
        
        num_train = X.shape[0]
        
        # Forward pass
        
        # Layer 1: Calculate the scores and pass it through ReLU
        # Dimension of output will be [num_train x hidden_dim]
        layer1_relu_output = np.maximum(0, X.dot(self.W1) + self.b1)
        
        # Layer 2: Calculate the scores and pass it through Softmax
        # Dimension of output will be [num_train x output_dim]
        layer2_temp = layer1_relu_output.dot(self.W2) + self.b1
        layer2_temp = layer2_temp - np.max(layer2_temp, axis=1).reshape((-1, 1)) # For numerical stability
        layer2_temp_exp = np.exp(layer2_temp)
        layer2_temp_exp_sums = np.sum(layer2_temp_exp, axis=1).reshape((-1, 1))
        layer2_softmax_output = layer2_temp_exp / layer2_temp_exp_sums
        
        # Calculate the cross entropy loss with regularization loss
        data_loss = -np.log(layer2_softmax_output[np.arange(num_train), y])
        data_loss = np.sum(loss) / num_train
        reg_loss  = 0.5 * reg_loss_factor * np.sum(self.W1 * self.W1)
        reg_loss += 0.5 * reg_loss_factor * np.sum(self.W2 * self.W2)
        loss = data_loss + reg_loss
        
        # Backward pass
        
        # Ok, so now we have a single number in 'loss', and we have to find the contribution 
        # of each of the weights and biases on this loss
        
        # Through output layer
        probs = layer2_softmax_output.copy()
        probs[np.arange(num_train), y] -= 1
        probs /= num_train
        dW2 = layer1_relu_output.T.dot(probs)
        dW2 += reg_loss_factor * self.W2
        db2 = np.sum(probs, axis=0)
        
        # Through hidden layer
        