## Machine Learning - Lab 08 - Backpropagation

* Full name: Đinh Anh Huy
* Student ID: 18110103

In [1]:
import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
class Layer:
  '''
    Represents a layer (hidden or output) in our neutal network.
  '''
  def __init__(self, n_input, n_neurons, activation=None, weights=None, bias=None, random_state=0):
    '''
      Initialize class Layer.
      Parameters:
      -------------------------------------------------
        @ n_input: The input size (coming from the input layer or a previous hidden layer)
        @ n_neurons: The number of neurons in this layer.
        @ activation: The activation function to use (if any).
        @ weights: The layer's weights.
        @ bias: The layer's bias.
        @ random_state: use for create random weights and bias if they are None.
    '''
    np.random.seed(random_state)
    self.weights_ = weights if weights is not None else np.random.randn(n_input, n_neurons)
    self.bias_ = bias if bias is not None else np.random.randn(1, n_neurons)
    self.activation = activation
    self.last_activation_ = None
    self.error_ = None
    self.delta_ = None

  def activate(self, X):
    '''
      Calculates the dot product of this layer.
      Parameters:
      -------------------------------------------------
        @ X: The input.
      Return:
      -------------------------------------------------
        The dot product of this layer after applying activation.
    '''
    pre_output = np.dot(X, self.weights_) + self.bias_
    self.last_activation_ = self._apply_activation(pre_output)
    return self.last_activation_

  def _apply_activation(self, X):
    '''
      Applies the chosen activation function (if any).
      Parameters:
      -------------------------------------------------
        @ X: The input.
      Return:
      -------------------------------------------------
        The result after applying chosen activation function for X.
    '''
    if self.activation is None:
      return X

    elif self.activation == 'sigmoid':
      return 1 / (1 + np.exp(-X))

    elif self.activation == 'tanh':
      return np.tanh(X)

    elif self.activation == 'ReLU':
      return np.array([max(0,i) for i in X.reshape(-1)]).reshape(X.shape)

    elif self.activation == 'softmax':
      return np.exp(X)/np.sum(np.exp(X), axis=1).reshape(-1,1)

    else:
      raise Exception('{} activation function is not valid.'.format(self.activation))

  def apply_activation_derivative(self, X):
    '''
      Applies the derivative of activation function (if any).
      Parameters:
      -------------------------------------------------
        @ X: The normal values that are already activated.

      Return:
      -------------------------------------------------
        The derived value.
    '''
    if self.activation is None:
      return X

    elif self.activation == 'sigmoid':
      return X*(1 - X)

    elif self.activation == 'tanh':
      return 1 - X**2

    elif self.activation == 'ReLU':
      return 1.*(X > 0)

    elif self.activation == 'softmax':
      return X*(1 - X)

    else:
      raise Exception('{} activation function is not valid.'.format(self.activation))

In [3]:
class NeuralNetwork:
  '''
    Represents a neural network.
  '''
  def __init__(self):
    self.learning_rate = None
    self._layers = []
 
  def add_layer(self, layer):
    '''
      Adds a layer to the neural network.
      Parameters:
      -------------------------------------------------
        @ layer: A layer to add.
    '''
    self._layers.append(layer)
 
  def feed_forward(self, X):
    '''
      Feeds forward the input through the layers.
      Parameters:
      -------------------------------------------------
        @ X: The input layer.
      Return:
      -------------------------------------------------
        The result.
    '''
    input = X.copy()
    for layer in self._layers:
      input = layer.activate(input)
    return input
 
  def backpropagation(self, forward, y):
    '''
      Performs the backward propagation algorithm and updates the layers weights.
      Parameters:
      -------------------------------------------------
        @ forward: The result of feed forward.
        @ y: The target values.
      
    ''' 
    # Loop over the layers backward
    for i in reversed(range(len(self._layers))):
      layer = self._layers[i]
 
      # If this is the output layer
      if layer == self._layers[-1]:
        layer.error_ = forward  - y 
        layer.delta_ = layer.error_ * layer.apply_activation_derivative(forward)
      else:
        next_layer = self._layers[i+1]
        layer.error_ = np.dot(next_layer.delta_, next_layer.weights_.T)
        layer.delta_ = layer.error_ * layer.apply_activation_derivative(layer.last_activation_)
        
  def update_weights(self, X):
    '''
      Update the weights.
      Parameters:
      -------------------------------------------------
        @ X: The input values.
    '''
    for i in range(len(self._layers)):
      layer = self._layers[i]
      # The input is either the previous layers output or X itself (for the first hidden layer)
      input = np.atleast_2d(X if i == 0 else self._layers[i-1].last_activation_)
      partial_weight = np.dot(input.T, layer.delta_)
      partial_bias = np.sum(layer.delta_, axis=0)
      layer.weights_ -= self.learning_rate * partial_weight
      layer.bias_ -= self.learning_rate * partial_bias
 
  def train(self, X, y, learning_rate=0.01, max_epochs=100, validation_data=None):
    '''
      Trains the neural network using backpropagation.
      Parameters:
      -------------------------------------------------
        @ X: The input values.
        @ y: The target values.
        @ learning_rate: The learning rate.
        @ max_epochs: The maximum number of epochs.
        @ validation_data: The set of validation values and target of them.
    '''
    self.shape_input = X.shape
    self.learning_rate = learning_rate
    start_time = time.time()
    
    for i in range(max_epochs):
      for x, target in zip(X, y):
        x = x.reshape(1,-1)
        target = target.reshape(1,-1)
        forward_output = self.feed_forward(x)
        self.backpropagation(forward_output, target)
        self.update_weights(x)
 
      if validation_data is None:
        raise Exception("Your validation data is empty.")
      else:
        MSE = np.mean(np.square(y - self.feed_forward(X)))
        val_accuracy = self.score(validation_data[0], validation_data[1])
        print("Epoch {0}/{1}\t----------------------\tTime Spent: {2:.2f}s - MSE: {3:.5f} - Val_Acc: {4:.4f}".format(i+1, max_epochs, time.time()-start_time, MSE, val_accuracy))
  
  def predict(self, X):
    '''
      Predicts a class (or classes).
      Parameters:
      -------------------------------------------------
        @ X: The input values.
      Return:
      -------------------------------------------------
        The predictions.
    '''
    ff = self.feed_forward(X)
    if ff.ndim == 1:
      return np.argmax(ff)
    else:
      return np.argmax(ff, axis=1)
 
  def score(self, X, y):
    '''
      Calculates the accuracy between the predicted labels and true labels.
      Parameters:
      -------------------------------------------------
        @ X: The input values.
        @ y: The target values.
      Return:
      -------------------------------------------------
        The calculated accuracy.
    '''
    output = self.feed_forward(X)
    return np.sum(np.argmax(output, axis=1) == np.array(y).flatten())/y.shape[0]
 
  def summary(self):
    '''
      Summaries neural network.
    '''
    from prettytable import PrettyTable
    t = PrettyTable(['Layer', 'Input nodes', 'Output nodes', 'Activation'])
    for i in range(len(self._layers)):
      layer = self._layers[i]
      if layer == self._layers[0]:
        t.add_row(['Input layer', layer.weights_.shape[0], layer.weights_.shape[1], layer.activation])
      elif layer == self._layers[-1]:
        t.add_row(['Output layer', layer.weights_.shape[0], layer.weights_.shape[1], layer.activation])
      else:
        t.add_row(['Hidden layer {}'.format(i), layer.weights_.shape[0], layer.weights_.shape[1], layer.activation])
    print(t)

In [4]:
def one_hot_vector(y):
    out = np.zeros((y.shape[0], max(y)+1))
    for i in range(y.shape[0]):
        out[i, y[i]] = 1
    return out

### Bài tập 1.
**Từ code demo hãy cài đặt thêm một module để chọn ra được bộ weights sao cho accuracy trên tập validation là tốt nhất.**

In [5]:
train = pd.read_csv("https://raw.githubusercontent.com/huynhthanh98/ML/master/lab-08/bt_train.csv")
valid = pd.read_csv("https://raw.githubusercontent.com/huynhthanh98/ML/master/lab-08/bt_valid.csv")
 
x1_train = train["x1"].values
x2_train = train["x2"].values
y_train = train["label"].values
 
x1_valid = valid['x1'].values
x2_valid = valid['x2'].values
y_valid = valid['label'].values
 
# Normalize data
x1_mean = np.mean(x1_train)
x1_std = np.std(x1_train)
x2_mean = np.mean(x2_train)
x2_std = np.std(x2_train)
 
x1_train = (x1_train - x1_mean)/ x1_std
x2_train = (x2_train - x2_mean)/ x2_std
 
x1_valid = (x1_valid - x1_mean)/ x1_std
x2_valid = (x2_valid - x2_mean)/ x2_std
 
X_train = np.concatenate([x1_train.reshape(-1,1), x2_train.reshape(-1,1)], axis=1)
y_train_ = one_hot_vector(y_train) # One-hot encode y_train
 
X_valid = np.concatenate([x1_valid.reshape(-1,1), x2_valid.reshape(-1,1)], axis=1)
print("Shape of X_train: ", X_train.shape)
print("Shape of X_valid: ", X_valid.shape)
print("Shape of y_train: ", y_train_.shape)
print("Shape of y_valid: ", y_valid.shape)

Shape of X_train:  (900, 2)
Shape of X_valid:  (300, 2)
Shape of y_train:  (900, 3)
Shape of y_valid:  (300,)


In [6]:
nn = NeuralNetwork()
nn.add_layer(Layer(2, 5, 'ReLU'))
nn.add_layer(Layer(5, 5, 'ReLU'))
nn.add_layer(Layer(5, 3, 'softmax'))
nn.train(X_train, y_train_, learning_rate=0.001, max_epochs=100, validation_data=(X_valid, y_valid))

Epoch 1/100	----------------------	Time Spent: 0.13s - MSE: 0.40558 - Val_Acc: 0.3100
Epoch 2/100	----------------------	Time Spent: 0.26s - MSE: 0.39523 - Val_Acc: 0.3100
Epoch 3/100	----------------------	Time Spent: 0.37s - MSE: 0.38446 - Val_Acc: 0.3667
Epoch 4/100	----------------------	Time Spent: 0.49s - MSE: 0.37426 - Val_Acc: 0.3667
Epoch 5/100	----------------------	Time Spent: 0.62s - MSE: 0.36581 - Val_Acc: 0.3733
Epoch 6/100	----------------------	Time Spent: 0.75s - MSE: 0.35929 - Val_Acc: 0.3433
Epoch 7/100	----------------------	Time Spent: 0.89s - MSE: 0.35406 - Val_Acc: 0.3433
Epoch 8/100	----------------------	Time Spent: 1.03s - MSE: 0.34983 - Val_Acc: 0.3400
Epoch 9/100	----------------------	Time Spent: 1.17s - MSE: 0.34554 - Val_Acc: 0.3367
Epoch 10/100	----------------------	Time Spent: 1.31s - MSE: 0.34022 - Val_Acc: 0.3367
Epoch 11/100	----------------------	Time Spent: 1.44s - MSE: 0.33112 - Val_Acc: 0.3333
Epoch 12/100	----------------------	Time Spent: 1.58

In [7]:
print("Accuracy on training set: ", nn.score(X_train, y_train))
print("Final accuracy on validation set: ", nn.score(X_valid, y_valid))

Accuracy on training set:  0.8611111111111112
Final accuracy on validation set:  0.6566666666666666


In [8]:
nn.summary()

+----------------+-------------+--------------+------------+
|     Layer      | Input nodes | Output nodes | Activation |
+----------------+-------------+--------------+------------+
|  Input layer   |      2      |      5       |    ReLU    |
| Hidden layer 1 |      5      |      5       |    ReLU    |
|  Output layer  |      5      |      3       |  softmax   |
+----------------+-------------+--------------+------------+


### Bài tập 2.
**Từ bộ dữ liệu bên dưới hãy cài đặt backpropagation cho bài toán phân biệt ung thư vú. Hãy tự chọn số layers và số nodes mà mình cho là thích hợp, cũng như là nêu ra số layers và số nodes của mỗi layer mà mình đã chọn. Tính accuracy trên tập training.**

In [9]:
from sklearn import datasets

breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data  
y = breast_cancer.target

from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split( X, y, test_size=0.2, random_state=42)

X_mean=np.mean(X_train)
X_std=np.std(X_train)

X_valid=(X_valid-X_mean)/X_std
X_train=(X_train-X_mean)/X_std
y_train_ = one_hot_vector(y_train)
print("Shape of X_train: ", X_train.shape)
print("Shape of y_train: ", y_train_.shape)
print("Shape of X_valid: ", X_valid.shape)
print("Shape of y_valid: ", y_valid.shape)

Shape of X_train:  (455, 30)
Shape of y_train:  (455, 2)
Shape of X_valid:  (114, 30)
Shape of y_valid:  (114,)


In [10]:
nn = NeuralNetwork()
nn.add_layer(Layer(30, 15, 'ReLU'))
nn.add_layer(Layer(15, 15, 'ReLU'))
nn.add_layer(Layer(15, 2, 'softmax'))
nn.train(X_train, y_train_, learning_rate=0.0007, max_epochs=500, validation_data=(X_valid, y_valid))

Epoch 1/500	----------------------	Time Spent: 0.09s - MSE: 0.12810 - Val_Acc: 0.8772
Epoch 2/500	----------------------	Time Spent: 0.17s - MSE: 0.10309 - Val_Acc: 0.9123
Epoch 3/500	----------------------	Time Spent: 0.27s - MSE: 0.09199 - Val_Acc: 0.9211
Epoch 4/500	----------------------	Time Spent: 0.36s - MSE: 0.08128 - Val_Acc: 0.9211
Epoch 5/500	----------------------	Time Spent: 0.46s - MSE: 0.07765 - Val_Acc: 0.9386
Epoch 6/500	----------------------	Time Spent: 0.56s - MSE: 0.07619 - Val_Acc: 0.9386
Epoch 7/500	----------------------	Time Spent: 0.66s - MSE: 0.07556 - Val_Acc: 0.9386
Epoch 8/500	----------------------	Time Spent: 0.74s - MSE: 0.07496 - Val_Acc: 0.9386
Epoch 9/500	----------------------	Time Spent: 0.84s - MSE: 0.07440 - Val_Acc: 0.9386
Epoch 10/500	----------------------	Time Spent: 0.94s - MSE: 0.07403 - Val_Acc: 0.9386
Epoch 11/500	----------------------	Time Spent: 1.02s - MSE: 0.07377 - Val_Acc: 0.9386
Epoch 12/500	----------------------	Time Spent: 1.11

In [11]:
print("Accuracy on training set: ", nn.score(X_train, y_train))
print("Final accuracy on validation set: ", nn.score(X_valid, y_valid))

Accuracy on training set:  0.9252747252747253
Final accuracy on validation set:  0.9824561403508771


In [12]:
nn.summary()

+----------------+-------------+--------------+------------+
|     Layer      | Input nodes | Output nodes | Activation |
+----------------+-------------+--------------+------------+
|  Input layer   |      30     |      15      |    ReLU    |
| Hidden layer 1 |      15     |      15      |    ReLU    |
|  Output layer  |      15     |      2       |  softmax   |
+----------------+-------------+--------------+------------+
