<a href="https://colab.research.google.com/github/jonathanbd135/CNN-MNIST-Tryout/blob/master/Copy_of_DL_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this homework assignment, you are requested to implement a full backprop algorithm using only *numpy*.

- We assume sigmoid activation across all layers.
- We assume a single value in the output layer

In [0]:
import numpy as np

np.random.seed(42)

In [0]:
class MyNN:
  def __init__(self, learning_rate, layer_sizes):
    self.learning_rate = learning_rate
    self.layer_sizes = layer_sizes
    self.model_params = {}
    self.memory = {}
    self.grads = {}
    
    # Initializing weights
    for layer_index in range(len(layer_sizes) - 1):
      W_input = layer_sizes[layer_index + 1]
      W_output = layer_sizes[layer_index]
      self.model_params['W_' + str(layer_index + 1)] = np.random.randn(W_input, W_output) * 0.1
      self.model_params['b_' + str(layer_index + 1)] = np.random.randn(W_input) * 0.1
      
      
  def forward_single_instance(self, x):    
    a_i_1 = x
    self.memory['a_0'] = x
    for layer_index in range(len(self.layer_sizes) - 1):
      W_i = self.model_params['W_' + str(layer_index + 1)]
      b_i = self.model_params['b_' + str(layer_index + 1)]
      z_i = np.dot(W_i, a_i_1) + b_i
      a_i = 1/(1+np.exp(-z_i))
      self.memory['a_' + str(layer_index + 1)] = a_i
      a_i_1 = a_i
    return a_i_1
  
  
  def log_loss(y_hat, y):
    '''
    Logistic loss, assuming a single value in y_hat and y.
    '''
    m = y_hat[0]
    cost = -y[0]*np.log(y_hat[0]) - (1 - y[0])*np.log(1 - y_hat[0])
    return cost
  
  
  def backward_single_instance(self, y):
    a_output = self.memory['a_' + str(len(self.layer_sizes) - 1)]
    dz = a_output - y
     
    for layer_index in range(len(self.layer_sizes) - 1, 0, -1):
      a_l_1 = self.memory['a_' + str(layer_index - 1)]
      dW = np.dot(dz.reshape(-1, 1), a_l_1.reshape(1, -1))
      self.grads['dW_' + str(layer_index)] = dW
      W_l = self.model_params['W_' + str(layer_index)]
      dz = (a_l_1 * (1 - a_l_1)).reshape(-1, 1) * np.dot(W_l.T, dz.reshape(-1, 1))
      # TODO: calculate and memorize db as well.
      self.grads['dB_' + str(layer_index)] = dz


  # TODO: update weights with grads
  def update(self): 
    etha = self.learning_rate

    for layer_index in range(len(self.layer_sizes) - 1, 0, -1):
      dw_i = ['dW_' + str(layer_index)]
      Wi_new = self.model_params['W_' + str(layer_index + 1)] - (etha * dw_i)
      self.model_params['W_' + str(layer_index + 1)] = Wi_new
      db_i = self.grads['dB_' + str(layer_index)]
      Bi_new = self.model_params['b_' + str(layer_index + 1)] - (etha * db_i)
      self.model_params['b_' + str(layer_index + 1)] = Bi_new


  # TODO: implement forward for a batch X.shape = (network_input_size, number_of_instance)
  #def forward_batch(self, X):
    

  
  # TODO: implement backward for a batch y.shape = (1, number_of_instance)
  #def backward_batch(self, y)
  
  # TODO: implement log_loss_batch, for a batch of instances
  # def log_loss(self, y_hat, y)

In [0]:
nn = MyNN(0.01, [3, 2, 1])

In [0]:
nn.model_params

{'W_1': array([[-0.1328186 ,  0.01968612,  0.07384666],
        [ 0.01713683, -0.01156483, -0.03011037]]),
 'W_2': array([[-0.04606388,  0.10571222]]),
 'b_1': array([-0.1478522 , -0.07198442]),
 'b_2': array([0.03436183])}

In [0]:
x = np.random.randn(3)
y = np.random.randn(1)

y_hat = nn.forward_single_instance(x)
print(y_hat)

[0.48860868]


In [0]:
nn.backward_single_instance(y)

In [0]:
def train(X, y, epochs, batch_size):
  '''
  Train procedure, please note the TODOs inside
  '''
  for e in range(1, epochs + 1):
    epoch_loss = 0
    # TODO: shuffle
    batches = #... TODO: divide to batches
    for X_b, y_b in batches:
      y_hat = nn.forward_batch(X_b)
      epoch_loss += nn.log_loss_batch(y_hat, y_b)
      nn.backward_batch(y_b)
      nn.update()
    print(f'Epoch {e}, loss={epoch_loss/len(batches)}')


In [0]:
# TODO: Make sure the following network trains properly

nn = MyNN(0.001, [6, 4, 3, 1])

X = np.random.randn(6, 100)
y = np.random.randn(1, 100)
batch_size = 8
epochs = 2

train(X, y, epochs, batch_size)

#TODO: train on an external dataset

Train on the Bike Sharing dataset, using the same split as in *DL Notebook 4 - logistic regression*.
Use the following features from the data:

* temp
* atemp
* hum
* windspeed
* weekday

The response variable is, like in Notebook 4, raw["success"] = raw["cnt"] > (raw["cnt"].describe()["mean"]).

The architecture of the network should be: [5, 40, 30, 10, 7, 5, 3, 1].

Use batch_size=8, and train it for 100 epochs on the train set (based on the split as requested above).

Then, plot loss per epoch.

In [0]:
# TODO: your code goes here