In [5]:
import numpy as np

trainData = np.genfromtxt('Fashion-MNIST/trainData.csv', delimiter=',')
trainLabels = np.genfromtxt('Fashion-MNIST/trainLabels.csv', delimiter=',')
testData = np.genfromtxt('Fashion-MNIST/testData.csv', delimiter=',')
testLabels = np.genfromtxt('Fashion-MNIST/testLabels.csv', delimiter=',')

# Dataloader

In [6]:
class Dataloader:
    
    def __init__(self, data, labels, n_classes, batch_size=None, shuffle=False):

        assert len(data)==len(labels)
        self.__n_classes = n_classes
        self.__batch_size = batch_size
        self.__shuffle = shuffle
        self.__data = data
        self.__onehot_labels = self.__onehot(labels, self.__n_classes)


    def __onehot(self, labels, n_classes):

        onehot_vectors = np.eye(n_classes)[np.array(labels.astype(int)).reshape(-1)]
        return onehot_vectors.reshape(list(labels.shape)+[n_classes])     


    def __iter__(self):

        if self.__shuffle:
            self.__shuffle_dataset()

        if self.__batch_size==None:
            yield (np.matrix(self.__data), np.matrix(self.__onehot_labels))
            return

        for idx in range(0, len(self.__data), self.__batch_size):
            yield (np.matrix(self.__data[idx:idx+self.__batch_size]), 
                np.matrix(self.__onehot_labels[idx:idx+self.__batch_size]))

# Activation Functions

In [7]:
class Identical:
    
    def __init__(self): pass
    
    def __val(self, matrix):
        identical_value = np.matrix(matrix, dtype=float)
        return identical_value


    def derivative(self, matrix):
        temp = np.matrix(matrix, dtype=float)
        identical_derivative = np.matrix(np.full(np.shape(temp), 1.))
        return identical_derivative
    

    def __call__(self, matrix):
        return self.__val(matrix)
    

class Relu:
    
    def __init__(self): pass
    
    def __relu(self, matrix):
        relu_value = np.maximum(0,matrix)
        return relu_value


    def derivative(self, matrix):
        relu_derivative = np.matrix(matrix, dtype=float)
        relu_derivative[relu_derivative<=0] = 0
        relu_derivative[relu_derivative>0] = 1
        return relu_derivative
    

    def __call__(self, matrix):
        return self.__relu(matrix)

    
class LeakyRelu:
    
    def __init__(self, negative_slope=0.01):
        self.negative_slope = 0.01
    

    def __val(self, matrix):                                                     
        y1 = ((matrix > 0) * matrix)                                                 
        y2 = ((matrix <= 0) * matrix * 0.01)                                         
        leacky_relu_value = y1 + y2  
        return leacky_relu_value


    def derivative(self, matrix):
        leacky_relu_derivative = np.ones_like(matrix)
        leacky_relu_derivative[matrix < 0] = alpha
        return leacky_relu_derivative
    

    def __call__(self, matrix):
        return self.__val(matrix)

    
class Sigmoid:
    
    def __init__(self): pass

    def __val(self, matrix):
        sigmoid_value = 1/(1+np.exp(-matrix))
        return sigmoid_value


    def derivative(self, matrix):
        sigmoid_value = self.__val(matrix)
        sigmoid_derivative = sigmoid_value * (1 - sigmoid_value)
        return sigmoid_derivative
    

    def __call__(self, matrix):
        return self.__val(matrix)


class Softmax:
    
    def __init__(self): pass

    def __val(self, matrix):
        expo = np.exp(matrix-np.max(matrix))
        expo_sum = np.sum(expo)
        softmax_value = expo/expo_sum
        return softmax_value


    def derivative(self, matrix):
        # TODO: Implement
        return softmax_derivative
    

    def __call__(self, matrix):
        return self.__val(matrix)

# Loss Function

In [8]:
class CrossEntropy: #(with softmax)
    
    def __init__(self): pass

    def __val(self, true_val, expected_val):
        assert np.shape(true_val)==np.shape(expected_val)

        m = expected_val.argmax(axis=1).shape[0]
        p = Softmax()(true_val)
        log_likelihood = -np.log(p[range(m),expected_val.argmax(axis=1)])
        cross_entropy_value = np.sum(log_likelihood) / m

        return cross_entropy_value
        
        
    def derivative(self, true_val, expected_val):
        assert np.shape(true_val)==np.shape(expected_val)

        m = expected_val.argmax(axis=1).shape[0]
        grad = Softmax()(true_val)
        grad[range(m),expected_val.argmax(axis=1)] -= 1
        cross_entropy_derivative = grad/m

        return cross_entropy_derivative
    

    def __call__(self, true_val, expected_val):
        return self.__val(true_val, expected_val)

# Layer

In [9]:
import math

class Layer:

    DEFAULT_LOW, DEFAULT_HIGH, DEFAULT_MEAN, DEFAULT_VAR = 0, 0.05, 0., 1.
  
    def __init__(self, input_size, output_size, 
                 activation=Identical(), initial_weight='uniform', **initializing_parameters):
        
        assert type(initial_weight)==str, 'Undefined activation function!'
        self.__weight_initializer_dict = {'uniform':self.__uniform_weight, 'normal':self.__normal_weight}
        assert initial_weight in self.__weight_initializer_dict, 'Undefined weight initialization function!'
        
        
        self.__n_neurons = output_size
        weight_initializer = self.__weight_initializer_dict[initial_weight]
        self.__weight = weight_initializer(input_size, self.__n_neurons, **initializing_parameters)
        self.__bias = weight_initializer(1, self.__n_neurons, **initializing_parameters)
        self.__activation = activation
        
        self.__last_input = None
        self.__last_activation_input = None
        self.__last_activation_output = None
        self.__last_activation_derivative = None
        
        
    def forward(self, layer_input):
        assert np.ndim(layer_input)==2
        assert np.size(self.__weight,0) == np.size(layer_input,1)

        self.__last_input = layer_input
        self.__last_activation_input = np.matmul(layer_input, self.__weight)+self.__bias
        self.__last_activation_output = self.__activation(self.__last_activation_input)
        self.__last_activation_derivative = self.__activation.derivative(self.__last_activation_input)

        return self.__last_activation_output
    

    def update_weights(self, backprop_tensor, lr):
        assert np.ndim(backprop_tensor)==2
        assert np.size(backprop_tensor,0) == np.size(self.__last_activation_derivative,0)
        assert np.size(backprop_tensor,1) == self.__n_neurons

        self.__weight = self.__weight - lr * np.matmul(np.transpose(self.__last_input), np.multiply(backprop_tensor, self.__last_activation_derivative))
        self._bias = self.bias - lr *  np.matmul(np.transpose(np.ones(self.__last_input.shape)), np.multiply(backprop_tensor, self.__last_activation_derivative))
        backprop_tensor = np.matmul(np.multiply(backprop_tensor, self.__last_activation_derivative), np.transpose(self.__weight))
        
        return backprop_tensor


    def __uniform_weight(self, dim1, dim2, **initializing_parameters):
        low, high = self.DEFAULT_LOW, self.DEFAULT_HIGH
        if 'low' in initializing_parameters.keys(): low = initializing_parameters['low']
        if 'high' in initializing_parameters.keys(): high = initializing_parameters['high']
        weights = np.random.uniform(low, high, (dim1, dim2))
        return weights


    def __normal_weight(self, dim1, dim2, **initializing_parameters):
        mean, var = self.DEFAULT_MEAN, self.DEFAULT_VAR
        if 'mean' in initializing_parameters.keys(): mean = initializing_parameters['mean']
        if 'var' in initializing_parameters.keys(): var = initializing_parameters['var']
        weights = np.random.normal(mean, math.sqrt(var), (dim1, dim2))
        return weights
    

    @property
    def n_neurons(self): return self.__n_neurons
    
    @property
    def weight(self): return self.__weight
    
    @property
    def bias(self): return self.__bias
    
    @property
    def activation(self): return self.__activation

# Feed Forward Neural Network

In [24]:
class FeedForwardNN:
    
    def __init__(self, input_shape):
        
        self.__input_shape = input_shape
        self.__output_shape = None
        
        self.__layers_list = []
        
        self.__lr = None
        self.__loss = None

        
    def add_layer(self, n_neurons, activation=Relu(), initial_weight='uniform', **initializing_parameters):
         
        assert type(n_neurons)==int, "Invalid number of neurons for the layer!"
        assert n_neurons>0, "Invalid number of neurons for the layer!"
        
        n_prev_neurons = self.__input_shape if len(self.__layers_list)==0 else self.__layers_list[-1].n_neurons
        new_layer = Layer(n_prev_neurons, n_neurons, activation, initial_weight, **initializing_parameters)
        self.__layers_list.append(new_layer)
        self.__output_shape = self.__layers_list[-1].n_neurons 
      
    
    def set_training_param(self, loss=CrossEntropy(), lr=1e-3):
        assert self.__layers_list, "Uncomplete model!"
        self.__loss = loss
        self.__lr = lr
    
    
    def forward(self, network_input):
        assert type(self.__output_shape) != None, "Model is not compiled!"
        
        layer_input = network_input
        for layer in self.__layers_list:
            layer_input = layer.forward(layer_input)
        network_output = layer_input

        return network_output
    
    
    def fit(self, epochs, trainloader, testloader=None, print_results=True):
        
        assert type(self.__output_shape) != None, "Model is not compiled!"
        assert type(self.__lr) != None and type(self.__loss) != None, "Training paramenters are not set!"

        log = {"train_accuracy":[], "train_loss":[], "test_accuracy":[], "test_loss":[]}
        
        for epoch in range(1, epochs+1):
            
            if print_results: 
                print('Epoch {}:'.format(epoch)) 
                
            average_accuracy, average_loss = self.__train(trainloader)
            log['train_accuracy'].append(average_accuracy)
            log['train_loss'].append(average_loss)
            if print_results:
                print('\tTrain: Average Accuracy: {}\tAverage Loss: {}'.format(average_accuracy, average_loss))
            
            if type(testloader) != type(None):
                average_accuracy, average_loss = self.__test(testloader)
                log['test_accuracy'].append(average_accuracy)
                log['test_loss'].append(average_loss)
                if print_results:
                    print('\tTest: Average Accuracy: {}\tAverage Loss: {}'.format(average_accuracy, average_loss))
                    
        return log
    

    def __train(self, trainloader):
        bach_accuracies, batch_losses = [], []
        for x_train, y_train in trainloader:
            batch_accuracy, batch_loss = self.__train_on_batch(x_train, y_train)
            bach_accuracies.append(batch_accuracy)
            batch_losses.append(batch_loss)
        return np.mean(bach_accuracies), np.mean(batch_losses)
    
    
    def __test(self, testloader):
        bach_accuracies, batch_losses = [], []
        for x_test, y_test in testloader:
            batch_accuracy, batch_loss = self.__test_on_batch(x_test, y_test)
            bach_accuracies.append(batch_accuracy)
            batch_losses.append(batch_loss)
        return np.mean(bach_accuracies), np.mean(batch_losses)

    
    def __train_on_batch(self, x_batch, y_batch):

        output = self.forward(x_batch)

        batch_average_loss = self.__update_weights(output, y_batch)

        batch_accuracy = self.__compute_accuracy(output, y_batch)
        
        return (batch_accuracy, batch_average_loss)
        
        
    def __test_on_batch(self, x_batch, y_batch):

        output = self.forward(x_batch)

        batch_average_loss = self.__update_weights(output, y_batch)

        batch_accuracy = self.__compute_accuracy(output, y_batch)

        return (batch_accuracy, batch_average_loss)
            
        
    def __get_labels(self, outputs):
        labels = outputs.argmax(axis=1)
        # print(labels.shape, outputs.shape)
        list = []
        for i in range(outputs.shape[0]):
            list.append([float(outputs[i, labels[i]])])
        
        # print(np.array(list).shape)
        return np.matrix(np.array(list))
    
    
    def __compute_accuracy(self, output, expected_output):
        accuracy = (self.__get_labels(output) == self.__get_labels(expected_output)).all(axis=1).mean()

        return accuracy
    
    
    def __update_weights(self, output, y_train):
        backprop_tensor = self.__loss.derivative(output,y_train)
        for layer in reversed(self.__layers_list):
           backprop_tensor = layer.update_weights(backprop_tensor , self.__lr)

        return np.mean(backprop_tensor)

# Training Sample Code

In [25]:
# Sample code for building and training a model

INPUT_SHAPE = 784
LEARNING_RATE = 1e-8
EPOCHS = 30
n_classes = 10
batch_size = 64

TRAINLOADER = Dataloader(trainData, trainLabels, n_classes, batch_size)
TESTLOADER = Dataloader(testData, testLabels, n_classes, batch_size)

network = FeedForwardNN(INPUT_SHAPE)
network.add_layer(20, input_shape=INPUT_SHAPE, activation=Relu(), weight_initializer='uniform')
network.add_layer(10, activation=Relu(), weight_initializer='uniform')
network.set_training_param(loss=CrossEntropy(), lr=LEARNING_RATE)

log = network.fit(EPOCHS, TRAINLOADER, TESTLOADER)

Epoch 1:
	Train: Average Accuracy: 0.0	Average Loss: -0.002483929119025641
	Test: Average Accuracy: 0.0	Average Loss: -0.0031616001478298934
Epoch 2:
	Train: Average Accuracy: 0.0	Average Loss: -0.0037617716725079097
	Test: Average Accuracy: 0.0	Average Loss: -0.004497229051852248
Epoch 3:
	Train: Average Accuracy: 0.0	Average Loss: -0.0051234137363452194
	Test: Average Accuracy: 0.0	Average Loss: -0.005933293599427279
Epoch 4:


KeyboardInterrupt: 