# A MLP Classifier for Fashion-mnist

You need to implement several parts of mlp.

- layers
- activations
- forward propagation
- backward propagation
- optimization

You should get an accuracy of more than **88.5** for development dataset and **88.0** for test dataset with proper strategy.

In [None]:
import numpy as np


class relu:
    def __init__(self):
        pass

    def forward(self, inputs):
        '''
        forward propagation through ReLU
        
        Prameters
        --------
        inputs: numpy.ndarray
            inputs for forward propagation
        
        Return
        ------
        outputs: numpy.ndarray
            outputs through ReLU
        '''
        # Your code here! You need to implement forward propagation through ReLU.
        
    def backward(self, inputs, outputs, fgrad):
        '''
        backward propagation through ReLU
        
        Parameters
        ----------
        inputs: numpy.ndarray
            input for ReLU function
        outputs: numpy.ndarray
            output of ReLU function
        fgrag: numpy.ndarray
            d(Loss)/d(outputs)
            
        Return
        ------
        grad: numpy.ndarray
            d(Loss)/d(inputs)
        '''
        # Your code here! You need to implement backward algorithm through ReLU.
        

class softmax:
    def __init__(self):
        pass

    def forward(self, inputs):
        '''
        forward propagation through softmax
        
        Prameters
        --------
        inputs: numpy.ndarray
            inputs for forward propagation
        
        Return
        ------
        outputs: numpy.ndarray
            outputs through softmax
        '''
        # Your code here! You need to implement forward algorithm through softmax.
        
    def backward(self, inputs, outputs, fgrad):
        '''
        forward propagation through softmax
        
        Parameters
        ----------
        inputs: numpy.ndarray
            input for softmax function
        outputs: numpy.ndarray
            output of softmax function
        fgrag: numpy.ndarray
            d(Loss)/d(outputs)
        
        Return
        ------
        grad: numpy.ndarray
            d(Loss)/d(inputs)
        '''
        # Your code here! You need to implement backward algorithm through softmax.
        

In [None]:
class FullyConnected:
    def __init__(self, dim_in, dim_out, batch_size, activation):
        # initialization according to He et al.(2015)
        self.W = np.random.randn(dim_in, dim_out).astype(np.float32) \
                 * np.sqrt(2.0/(dim_in))
        self.b = np.zeros([dim_out]).astype(np.float32)
        self.batch_size = batch_size
        self.activation = activation

    def forward(self, inputs):
        '''
        Forward propagation through a fully connected layer
        
        Prameters
        --------
        inputs: numpy.ndarray
            inputs for forward propagation
        
        Return
        ------
        outputs: numpy.ndarray
            outputs through a fully connected layer
        '''
        # Your code here! You need to implement backward algorithm through fully connected layers.


    def backward(self, grad):
        '''
        Backward propagation through a fully connected layer
        
        Parameters
        ----------
        grad: numpy.ndarray
            d(Loss)/d(self.outputs)
        
        Return
        ------
        grad: numpy.ndarray
            d(Loss)/d(self.inputs)
        '''
        activ_grad = self.activation.backward(self.outputs,
                                              self.outputs_act, grad)
        # Your code below! You need to implement backward algorithm through fully connected layers.        
        self.grad_b = # Your code here! You need to calculate d(Loss)/d(self.b)
        self.grad_W = # Your code here! You need to calculate d(Loss)/d(self.W)
        grad_inputs = # Your code here! You need to calculate d(Loss)/d(self.inputs)
        return grad_inputs

    def update(self, lr):
        '''
        update W and B according to learning rate and gradient
        Parameters
        ----------
        lr: integer
            learning rate.
        '''
        # Your code here! You need to update W and B according to learning rate and gradient

In [None]:
from utils import mnist_reader
import logging

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)-15s %(levelname)s: %(message)s')
LOG = logging.getLogger('mlp-fashion-mnist')
# np.random.seed(233)


def evaluate(inputs, y):
    for layer in FCs:
        outputs = layer.forward(inputs)
        inputs = outputs
    outputs = np.argmax(outputs, axis=1)
    precision = float(np.mean(outputs == y))
    return precision

class params:
    def __init__(self):
        self.batch_size = 200
        self.epochs = 25
        self.lr = 0.1



In [None]:
if __name__ == "__main__":
    opts = params()

    X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
    X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
    X_train = X_train.astype(np.float32) / 255
    X_test = X_test.astype(np.float32) / 255
    r = np.random.permutation(len(y_train))
    X_train = X_train[r]
    y_train = y_train[r]
    X_dev = X_train[:12000]
    y_dev = y_train[:12000]
    X_train = X_train[10000:]
    y_train = y_train[10000:]

    LOG.info("finish data preprocessing.")

    FCs = [FullyConnected(784, 256, opts.batch_size, relu()),
           FullyConnected(256, 128, opts.batch_size, relu()),
           FullyConnected(128, 64, opts.batch_size, relu()),
           FullyConnected(64, 10, opts.batch_size, softmax())]

    LOG.info("finish initialization.")

    n_samples = len(y_train)
    order = np.arange(n_samples)
    best_accuracy, test_accuracy = 0, 0
    for epochs in range(0, opts.epochs):
        np.random.shuffle(order)
        cost = 0.
        for batch_start in range(0, n_samples, opts.batch_size):
            batch_end = batch_start + opts.batch_size if batch_start \
                        + opts.batch_size < n_samples else n_samples
            batch_id = order[batch_start: batch_end]
            xs, ys = X_train[batch_id], y_train[batch_id]
            inputs = xs
            for layer in FCs:
                outputs = layer.forward(inputs)
                inputs = outputs
            cost += -np.mean(np.log(outputs) * np.eye(10)[ys])
            grad = # Your code here! You need to calculate d(Loss)/d(outputs)
            for layer in range(len(FCs)-1, -1, -1):
                grad = FCs[layer].backward(grad)
            for layer in FCs:
                layer.update(opts.lr)
        accuracy = evaluate(X_dev, y_dev)
        LOG.info("iteration {0}, cost = {1}, dev_accuracy = {2}".format(
            epochs, cost, precision))
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            test_accuracy = evaluate(X_test, y_test)
            LOG.info("New best achived. test_accuracy = {0}"
                     .format(test_accuracy))
    LOG.info("Training finished. dev_accuracy = {0}, test_accuracy = {1}"
             .format(best_accuracy, test_accuracy))



