# Backward Propagation with Mini-Batch Stochastic Gradiant Descent

## Spesifikasi
- Jumlah hidden layer maksimal 10
- Jumlah node dalam setiap hidden layer dapat bervariasi
- Fully-connected layer
- Fungsi aktivasi berupa sigmoid untuk semua hidden layer maupun output layer
- Node output berjumlah 1
- Program memberikan pilihan untuk menggunakan momentum atau tidak
- Program mengimplementasikan mini-batch stochastic gradient descent
- Implementasi incremental dengan setting batch-size=1 dan implementasi batch dengan setting batchsize=jumlah
data.

Algoritma ini akan diuji dengan data weather (tennis) yang diambil dari wk

In [7]:
from scipy.io import arff
import numpy as np
import pandas as pd
import math
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [85]:
class NeuralNetwork():
    def __init__(self, 
                 input_nodes,
                 hidden_nodes=[],
                 output_nodes=1,
                 batch_size=4,
                 learning_rate=1e-4,
                 momentum=0):
        assert(input_nodes >= 1)
        assert(0 <= len(hidden_nodes) <= 10)
        assert(batch_size >= 1)
        
        self.layers = self._init_layers(input_nodes, hidden_nodes, output_nodes)
        self.batchSize = batch_size
        self.learning_rate = learning_rate
        self.momentum = momentum
        
        self.params_values = self._init_weights()

        
    def _init_layers(self, input_nodes, hidden_nodes, output_nodes):
        layers = []
        layers.append(input_nodes)
        for hidden_layer in hidden_nodes:
            layers.append(hidden_layer)
        layers.append(output_nodes)
        
        return layers
        
        
    def _init_weights(self):
        """
        Initiate weights and bias weights for the neural network
        """
        params_values = {}
        for idx in range(len(self.layers)-1):
            layer_input_size = self.layers[idx]
            layer_output_size = self.layers[idx+1]
            
            # Weight
            params_values['W' + str(idx+1)] = np.random.randn(layer_output_size, layer_input_size) * 0.1
            
            # Bias Weight
            params_values['b' + str(idx+1)] = np.random.randn(layer_output_size, 1) * 0.1
            
        return params_values
    
    
    def _single_layer_feed_forward(self, A_prev, W_curr, b_curr):
        """
        Feed forward for single layer in neural network
        """
        Z_curr = np.dot(W_curr, A_prev) + b_curr
        return self._sigmoid(Z_curr), Z_curr
        
        
    def _full_feed_forward(self, X):
        memory = {}
        A_curr = X
        
        for idx, layer in enumerate(self.layers):
            print(self.layers[idx])
            A_prev = A_curr
            W_curr = self.params_values['W' + str(idx+1)]
            b_curr = self.params_values['b' + str(idx+1)]
            
            A_curr, Z_curr = self._single_layer_feed_forward(A_prev, W_curr, b_curr)
            
            memory['A' + str(idx)] = A_prev
            memory['Z' + str(idx+1)] = Z_curr
            
        return A_curr, memory
        
    
    def _single_layer_backward_prop(self, dA_curr, W_curr, b_curr, Z_curr, A_prev):
        m = A_prev.shape[1]
        
        dZ_curr = self._sigmoid_backward(dA_curr, Z_curr)
        dW_curr = np.dot(dZ_curr, A_prev) / m
        db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
        dA_prev = np.dot(W_curr.T, dZ_curr)
        
        return dA_prev, dW_curr, db_curr
    
    
    def _full_backward_prop(self, y_hat, y, memory):
        grads_values = {}
        m = y.shape[1]
        y = y.reshape(y_hat.shape)
        
        dA_prev = -(np.divide(Y, y_hat) - np.divide(1-y, 1-y_hat))
        
        for layer_idx_prev, layer in reversed(enumerate(self.layers)):
            layer_idx_curr = layer_idx_prev + 1
            
            dA_curr = dA_prev
            
            A_prev = memory['A' + str(layer_idx_prev)]
            Z_curr = memory['Z' + str(layer_idx_curr)]
            W_curr = self.params_values['W' + str(layer_idx_curr)]
            b_curr = self.params_values['b' + str(layer_idx_curr)]
            
            dA_prev, dW_curr,db_curr = self._single_layer_backward_prop(dA_curr, W_curr, b_curr, Z_curr, A_prev)
            
            grads_values['dW' + str(layer_idx_curr)] = dW_curr
            grads_values['db' + str(layer_idx_curr)] = db_curr
            
        return grads_values
    
    
    def _update(grads_values):
        for layer_idx, layer in enumerate(self.layers):
            self.params_values['W' + str(layer_idx)] -= self.learning_rate * grads_values['dW' + str(layer_idx)]
            self.params_values['b' + str(layer_idx)] -= self.learning_rate * grads_values['db' + str(layer_idx)]
    
    
    def _sigmoid(self, weighted_sum):
        return 1/(1+np.exp(-weighted_sum))
    
    
    def _sigmoid_backward(self, delta, weighted_sum):
        sigmoid = self._sigmoid(weighted_sum)
        return delta * sigmoid * (1 - sigmoid)
    
        
    def _calc_error(self, output, target):
        return math.pow(output-target, 2)/2
    
    
    def _calc_accuracy(self, output, target):
        count_correct = 0
        for i in range(len(output)):
            if output[i] == target[i]:
                count_correct += 1
        return count_correct / len(output)
    
    
    def train(self, X, y, epochs):
        cost_history = []
        accuracy_history = []
        
        for i in range(epochs):
            y_hat, cache = self._full_feed_forward(X)
            cost = self._calc_error(y_hat, y)
            cost_history.append(cost)
            accuracy = self._calc_accuracy(y_hat, y)
            accuracy_history.append(accuracy)
            
            grads_values = self._full_backward_prop(y_hat, y, cache)
            self.params_values = update(grads_values)
            
        return self.params_values, cost_history, accuracy_history

In [88]:
dataset = arff.loadarff('../data/weather.arff')
df = pd.DataFrame(dataset[0])

STR_COLUMNS = ['outlook', 'windy', 'play']

for column in STR_COLUMNS:
    df[column] = df[column].str.decode('utf-8')
    
df_encoded = pd.get_dummies(df, columns=['outlook', 'windy'])
df_encoded = pd.get_dummies(df_encoded, columns=['play'], drop_first=True)

X = df_encoded.iloc[:, :7].values
y = df_encoded.iloc[:, 7:].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=17779)

model = NeuralNetwork(input_nodes=7, hidden_nodes=[8], output_nodes=1)
# print(model.params_values)
param, costs, accs = model.train(X_train, y_train, epochs=1000)

7


ValueError: shapes (8,7) and (12,7) not aligned: 7 (dim 1) != 12 (dim 0)