In [1]:
import math
import numpy as np

class DenseLayer:
    def __init__(self, units, activation, input_dim, init, use_bias=False):
        self.units = units
        self.input_dim = input_dim
        
        if activation == 'sigmoid':
            self.activation_function = self.sigmoid
        else:
            print('Activation function not supported')
        
        if init == 'uniform':
            self.weight_matrix = np.random.uniform(0.0, 1.0, size=(self.units, input_dim)) 
        elif init == 'random':
            self.weight_matrix = np.random.random(size=(self.units, input_dim))
        else:
            print('Init function not supported')
        
        self.delta_weight_matrix_before = np.zeros((self.units, input_dim))
        self.delta_weight_matrix = np.zeros((self.units, input_dim))
        
        self.use_bias = use_bias
        if self.use_bias:
            bias = np.zeros((units, 1))
            self.weight_matrix = np.hstack((self.weight_matrix, bias))
            self.delta_weight_matrix_before = np.hstack((self.delta_weight_matrix_before, np.zeros((units, 1))))
            self.delta_weight_matrix = np.hstack((self.delta_weight_matrix, np.zeros((units, 1))))
            
    def calculate_sigma(self, input_list):
        if self.use_bias:
            input_list = np.append(input_list, 1)
        
        result_list = np.array([])
        for weight_neuron in self.weight_matrix:
            result_list = np.append(result_list, np.dot(weight_neuron, input_list))
        return np.array(result_list)
    
    def calculate_output(self, input_list):
        output_list = np.array([])
        for sigma_neuron in self.calculate_sigma(input_list):
            output_list = np.append(output_list, self.activation_function(sigma_neuron))
        self.output_list = output_list
        return self.output_list
    
    def calculate_local_gradient_output_layer(self, target_list):
        """
        Use this if the layer is output layer
        """
        result_list = np.array([])
        for index, output in enumerate(self.output_list):
            local_gradient = output * (1 - output) * (target_list[index] - output)
            result_list = np.append(result_list, local_gradient)  
        self.local_gradient = result_list
        return self.local_gradient
    
    def calculate_local_gradient_hidden_layer(self, local_gradient_output_list, output_layer_weight_matrix):
        """
        Use this if the layer is hidden layer
        """
        result_list = np.array([])
        for index, output in enumerate(self.output_list):
            sigma_local_gradient_output = 0
            for unit_number, local_gradient in enumerate(local_gradient_output_list):
                sigma_local_gradient_output += output_layer_weight_matrix[unit_number][index] * local_gradient
            error_hidden = output * (1 - output) * sigma_local_gradient_output
            result_list = np.append(result_list, error_hidden)
        self.local_gradient = result_list
        return self.local_gradient
    
    def update_delta_weight(self, lr, input_list, momentum=None):
        """
        Function to update delta weight
        """
        if self.use_bias:
            input_list = np.append(input_list, 1)
        if momentum == None:
            for j, unit in enumerate(self.weight_matrix): #j  
                for i, source in enumerate(unit): #i
                    delta_weight = lr * self.local_gradient[j] * input_list[i]
#                     new_weight = source + delta_weight
#                     self.weight_matrix[j][i] = new_weight
                    self.delta_weight_matrix[j][i] = delta_weight
        else:
            for j, unit in enumerate(self.weight_matrix): #j  
                for i, source in enumerate(unit): #i
                    delta_weight = lr * self.local_gradient[j] * input_list[i] + momentum * self.delta_weight_matrix_before[j][i]
                    
                    # Update Delta Weight
                    self.delta_weight_matrix_before[j][i] = delta_weight
                    
#                     new_weight = source + delta_weight
#                     self.weight_matrix[j][i] = new_weight
            
            # Copy Last Update of Weight Matrix Before (Equal to Last Weight Matrix)
            for j, unit in enumerate(self.delta_weight_matrix_before):
                for i, source in enumerate(unit):
                    self.delta_weight_matrix[j][i] = self.delta_weight_matrix_before[j][i]
            
    def update_weight(self):
        """
        Function to update weight
        """
        for j, unit in enumerate(self.delta_weight_matrix_before):
            for i, source in enumerate(unit):
                self.weight_matrix[j][i] += self.delta_weight_matrix[j][i]
    
    def sigmoid(self, x):
        return 1 / (1 + math.exp(-x))

In [2]:
np.random.seed(5)
layer = DenseLayer(3, 'sigmoid', 2, 'uniform', True)
layer.calculate_sigma([3, 2])

array([2.40744413, 2.45737928, 2.68872129])

In [3]:
layer.calculate_output([3, 2])

array([0.9173932 , 0.92109941, 0.93635782])

# Test Feedforward

In [4]:
layer_hidden = DenseLayer(2, 'sigmoid', 2, 'uniform', True)
layer_hidden.weight_matrix = np.array([[-0.2, 0.1, 0.1], [-0.1, 0.3, 0.1]])

layer_hidden.calculate_output([0.1, 0.9])

array([0.54239794, 0.58904043])

# Test Calculate Local Gradient Hidden Layer

In [5]:
layer_hidden_output = DenseLayer(2, 'sigmoid', 2, 'uniform', True)
layer_hidden_output.output_list = [0.542, 0.589]
layer_hidden_output.calculate_local_gradient_hidden_layer([0.0663], [[0.2, 0.3]])

array([0.00329161, 0.00481495])

# Test Calculate Local Gradient Output Layer

In [6]:
layer_error_output = DenseLayer(1, 'sigmoid', 2, 'uniform', True)
layer_error_output.output_list = [0.619]
layer_error_output.calculate_local_gradient_output_layer([0.9])

array([0.06627076])

# Test Update Weight Momentum 1

In [7]:
layer_test_update_weight = DenseLayer(1, 'sigmoid', 2, 'uniform', True)
layer_test_update_weight.weight_matrix = np.array([[0.2, 0.3, 0.2]])

layer_test_update_weight.delta_weight_matrix_before = np.array([[0.2, 0.3, 0.2]])
print(layer_test_update_weight.weight_matrix)
print(layer_test_update_weight.delta_weight_matrix_before)

layer_test_update_weight.local_gradient = [0.0663]
layer_test_update_weight.update_delta_weight(0.25, [0.542, 0.589], 0.0001)
layer_test_update_weight.update_weight()
print(layer_test_update_weight.delta_weight_matrix_before)
print(layer_test_update_weight.weight_matrix)

[[0.2 0.3 0.2]]
[[0.2 0.3 0.2]]
[[0.00900365 0.00979267 0.016595  ]]
[[0.20900365 0.30979267 0.216595  ]]


# Test Update Weight Momentum 2

In [8]:
layer_test_update_weight2 = DenseLayer(2, 'sigmoid', 2, 'uniform', True)
layer_test_update_weight2.weight_matrix = np.array([[-0.2, 0.1, 0.1], [-0.1, 0.3, 0.1]])

layer_test_update_weight2.delta_weight_matrix_before = np.array([[-0.2, 0.1, 0.1], [-0.1, 0.3, 0.1]])
print(layer_test_update_weight2.weight_matrix)
print(layer_test_update_weight2.delta_weight_matrix_before)

layer_test_update_weight2.local_gradient = [0.0033, 0.0049]
layer_test_update_weight2.update_delta_weight(0.25, [0.1, 0.9], 0.0001)
layer_test_update_weight2.update_weight()
print(layer_test_update_weight2.delta_weight_matrix_before)
print(layer_test_update_weight2.weight_matrix)

[[-0.2  0.1  0.1]
 [-0.1  0.3  0.1]]
[[-0.2  0.1  0.1]
 [-0.1  0.3  0.1]]
[[6.2500e-05 7.5250e-04 8.3500e-04]
 [1.1250e-04 1.1325e-03 1.2350e-03]]
[[-0.1999375  0.1007525  0.100835 ]
 [-0.0998875  0.3011325  0.101235 ]]


# Test Update Weight Without Momentum TODO

In [9]:
import pandas as pd

# Model Class

In [10]:
class LunakModel:
    def __init__(self, loss='root_mean_squared', optimizer='sgd'):
        assert loss == 'root_mean_squared', 'loss function not supported'
        assert optimizer == 'sgd', 'optimizer not supported'
        self.layers = []
    
    def add(self, layer):
        self.layers.append(layer)
    
    def fit(self, X, y, epochs, lr, momentum=None, batch_size=None):
        assert X.shape[1] == self.layers[0].input_dim, 'Input dimension must be same with the column'
        
        if batch_size == None:
            batch_size = len(X)
        for epoch in range(epochs):
            
            # SGD Batch / Mini Batch
            delta = len(X) // batch_size
            for start in range(0, len(X), delta):
                X_instance = X[start:start+delta]
                y_instance = y[start:start+delta]
                
                for idx_instance, instance in enumerate(X_instance):
                    # Feed Forward
                    output_list = self.layers[0].calculate_output(instance)
                    for layer in self.layers[1:]:
                        new_output_list = layer.calculate_output(output_list)
                        output_list = new_output_list
                    
                    # Calculate Local Gradient for Output Layer
                    next_local_gradient_list = self.layers[-1].calculate_local_gradient_output_layer([y_instance[idx_instance]])
                    next_layer_weight_matrix = self.layers[-1].weight_matrix
                    
                    # Calculate Local Gradient for Hidden Layer
                    for idx_layer, layer in enumerate(reversed(self.layers[0:-1])):
                        next_local_gradient_list = layer.calculate_local_gradient_hidden_layer(next_local_gradient_list, next_layer_weight_matrix)
                        next_layer_weight_matrix = layer.weight_matrix
                        
                    # Calculate Delta Weight
                    self.layers[0].update_delta_weight(lr, instance, momentum)
                    for idx_layer, layer in enumerate(self.layers[1:]):
                        layer.update_delta_weight(lr, self.layers[idx_layer].output_list, momentum)
                        
                # Update Weight
                for layer in self.layers:
                    layer.update_weight()
                    
    

In [11]:
model = LunakModel()

In [12]:
model.add(DenseLayer(2, 'sigmoid', 2, 'uniform', True))
model.add(DenseLayer(1, 'sigmoid', 2, 'uniform', True))

In [13]:
model.layers[0].weight_matrix = np.array([[-0.2, 0.1, 0.1], [-0.1, 0.3, 0.1]])
model.layers[1].weight_matrix = np.array([[0.2, 0.3, 0.2]])
print(model.layers[0].weight_matrix)

print(model.layers[1].weight_matrix)


[[-0.2  0.1  0.1]
 [-0.1  0.3  0.1]]
[[0.2 0.3 0.2]]


In [14]:
X_train = np.array([
    [0.1, 0.9]
])
y_train = np.array([
    0.9
])

In [15]:
model.fit(X_train, y_train, 1, 0.25, 0.0001)

In [16]:
print(model.layers[0].weight_matrix)
print(model.layers[0].local_gradient)

[[-0.19991775  0.10074028  0.10082253]
 [-0.09987967  0.30108299  0.10120332]]
[0.00329012 0.00481328]


In [17]:
print(model.layers[1].weight_matrix)
print(model.layers[1].local_gradient)

[[0.20898739 0.30976024 0.21656973]]
[0.06627891]
