<a href="https://colab.research.google.com/github/hechubo/DDM-Coursework/blob/master/5055_1_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import subprocess
import os
import struct
import json


class Node:
    '''
    Node template for nodes in computational graph.

    Attributes:
        name (string): name for the node;
        parameters (list of ndarray): list used to save node's parameters;
        parameters_deltas (list of ndarray): list of gradients.
    '''
    def __init__(self, name, parameters=None):
        self.name = name
        self.parameters = parameters
        self.parameters_deltas = [None for _ in range(len(self.parameters))]


class Linear(Node):
    '''
    Linear layer
    '''
    def __init__(self, input_shape, output_shape, weight=None, bias=None):
        '''
        Args:
            input_shape (int): input shape;
            output_shape (int): output shape;
            x (2darray): input array of shape (batch_size, num_pixels).
        '''
        if weight is None:
            # Xavier initialization
            weight = np.random.randn(input_shape, output_shape) * np.sqrt(2.0 / (input_shape + output_shape))
        if bias is None:
            bias = np.zeros(output_shape)
        super(Linear, self).__init__('linear', [weight, bias])

    def forward(self, x):
        '''
        Args:
            x (2darray): input array of shape (batch_size, n_variables).

        Returns:
            ndarray: linear layer result.
        '''
        self.x = x
        return np.matmul(x, self.parameters[0]) + self.parameters[1]

    def backward(self, delta):
        '''
        Args:
            delta (ndarray): gradient of L with repect to node's output, dL/dy.

        Returns:
            ndarray: gradient of L with respect to node's input, dL/dx
        '''
        self.parameters_deltas[0] = self.x.T.dot(delta)
        self.parameters_deltas[1] = np.sum(delta, 0)
        return delta.dot(self.parameters[0].T)


class Sigmoid(Node):
    '''
    Sigmoid activation function
    '''
    def __init__(self):
        super(Sigmoid, self).__init__('sigmoid', [])

    def forward(self, x, *args):
        '''
        Args:
            x (2darray): input array of shape (batch_size, n_variables).

        Returns:
            ndarray: sigmoid activation result.
        '''
        self.x = x
        self.y = 1.0 / (1.0 + np.exp(-x))
        return self.y

    def backward(self, delta):
        '''
        Args:
            delta (ndarray): gradient of L with repect to node's output, dL/dy.

        Returns:
            ndarray: gradient of L with respect to node's input, dL/dx
        '''
        return delta * ((1 - self.y) * self.y)


class Softmax(Node):
    '''
    Softmax activation function
    '''
    def __init__(self):
        super(Softmax, self).__init__('softmax', [])

    def forward(self, x):
        '''
        Args:
            x (2darray): input array of shape (batch_size, n_variables).

        Returns:
            ndarray: softmax activation result.
        '''
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.y = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return self.y

    def backward(self, delta):
        '''
        Args:
            delta (ndarray): gradient of L with repect to node's output, dL/dy.

        Returns:
            ndarray: gradient of L with respect to node's input, dL/dx
        '''
        return self.y * (delta - np.sum(delta * self.y, axis=1, keepdims=True))


class CrossEntropy(Node):
    '''
    CrossEntropy loss function
    '''
    def __init__(self):
        super(CrossEntropy, self).__init__('cross_entropy', [])

    def forward(self, x, l):
        '''
        Args:
            x (2darray): input array of shape (batch_size, n_variables), softmax result.
            l (2darray): label array of shape (batch_size, n_variables), one-hot encoded.

        Returns:
            ndarray: cross entropy loss.
        '''
        self.x = x
        self.l = l
        self.y = -np.sum(l * np.log(x + 1e-8)) / x.shape[0]
        return self.y

    def backward(self, delta=1):
        '''
        Args:
            delta (int): defaults to 1 since the output of cross entropy loss is a scalar.

        Returns:
            ndarray: gradient of L with respect to node's input, dL/dx
        '''
        return -self.l / (self.x + 1e-8) / self.x.shape[0]


class Mean(Node):
    '''
    Mean function
    '''
    def __init__(self):
        super(Mean, self).__init__('mean', [])

    def forward(self, x):
        '''
        Args:
            x (2darray): input array of shape (batch_size, n_variables).

        Returns:
            ndarray: mean function result.
        '''
        self.x = x
        return x.mean()

    def backward(self, delta):
        '''
        Args:
            delta (ndarray): gradient of L with repect to node's output, dL/dy.

        Returns:
            ndarray: gradient of L with respect to node's input, dL/dx
        '''
        return delta * np.ones(self.x.shape) / np.prod(self.x.shape)


def load_MNIST():
    '''
    Download and unpack MNIST data.

    Returns:
        tuple of ndarray: tuple of length 4. They are training set data, training set label,
            test set data and test set label.
    '''
    base = "http://yann.lecun.com/exdb/mnist/"
    objects = ['t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte',
               'train-images-idx3-ubyte', 'train-labels-idx1-ubyte']
    end = ".gz"
    path = "data/raw/"
    cmd = ["mkdir", "-p", path]
    subprocess.check_call(cmd)
    print('Downloading MNIST dataset. Please do not stop the program\
    during the download. If you do, remove `data` folder and try again.')
    for obj in objects:
        if not os.path.isfile(path + obj):
            cmd = ["wget", base + obj + end, "-P", path]
            subprocess.check_call(cmd)
            cmd = ["gzip", "-d", path + obj + end]
            subprocess.check_call(cmd)

    def unpack(filename):
        '''
        Unpack file.
        '''
        with open(filename, 'rb') as f:
            _, _, dims = struct.unpack('>HBB', f.read(4))
            shape = tuple(struct.unpack('>I', f.read(4))
                          [0] for d in range(dims))
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)
            return data

    # load objects
    data = []
    for name in objects:
        name = path + name
        data.append(unpack(name))
    labels = np.zeros([data[1].shape[0], 10])
    for i, iterm in enumerate(data[1]):
        labels[i][iterm] = 1
    data[1] = labels
    labels = np.zeros([data[3].shape[0], 10])
    for i, iterm in enumerate(data[3]):
        labels[i][iterm] = 1
    data[3] = labels
    return data


def random_draw(data, label, batch_size):
    '''
    Randomly draw.

    Args:
        data (ndarray): dataset of shape (batch_size, n_variables)
        label (ndarray): one-hot label for dataset,
            for example, 3 is [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
        batch_size (int): size of batch.

    Returns:
        2darray: image data batch;
        2darray: label of images draw.
    '''
    perm = np.random.permutation(data.shape[0])
    data_b = data[perm[:batch_size]]
    label_b = label[perm[:batch_size]]
    return data_b.reshape([data_b.shape[0], -1]) / 255.0, label_b

def match_ratio(result, label):
    '''the ratio of result matching target.'''
    label_p = np.argmax(result, axis=1)
    label_t = np.argmax(label, axis=1)
    ratio = np.sum(label_p == label_t) / label_t.shape[0]
    return ratio


def net_forward(net, x, label):
    '''forward function for this sequencial network.'''
    for node in net:
        if node.name == 'cross_entropy':
            result = x
            x = node.forward(x, label)
        else:
            x = node.forward(x)
    return result, x


def net_backward(net):
    '''backward function for this sequencial network.'''
    y_delta = 1.0
    for node in net[::-1]:
        y_delta = node.backward(y_delta)
    return y_delta




In [6]:
if __name__ == '__main__':
    batch_size = 200
    learning_rate = 0.3
    dim_img = 784
    num_digit = 10
    num_epoch = 40
    test_data, test_label, train_data, train_label = load_MNIST()
    num_iteration = len(train_data) // batch_size

    net = [Linear(dim_img, 200), Sigmoid(), Linear(200, 100), Sigmoid(), Linear(100, num_digit), Softmax(), CrossEntropy(), Mean()]

    nparams = 0
    for term in net:
        for para in term.parameters:
            nparams += np.prod(para.shape)
    print('total number of trainable parameters:', nparams)

    x, label = random_draw(test_data, test_label, 1000)
    result, loss = net_forward(net, x, label)
    print('Before Training.\nTest loss = %.4f, correct rate = %.3f' % (loss, match_ratio(result, label)))

    for epoch in range(num_epoch):
        for j in range(num_iteration):
            x, label = random_draw(train_data, train_label, batch_size)
            result, loss = net_forward(net, x, label)

            net_backward(net)

            for node in net:
                if node.name == 'linear':
                    for i in range(len(node.parameters)):
                        node.parameters[i] -= learning_rate * node.parameters_deltas[i]

        result_test, loss_test = net_forward(net, test_data.reshape(test_data.shape[0], -1) / 255.0, test_label)
        print("epoch = %d/%d, loss = %.4f, corret rate = %.3f, test correct rate = %.3f" %
              (epoch, num_epoch, loss, match_ratio(result, label), match_ratio(result_test, test_label)))

    result_test, loss_test = net_forward(net, test_data.reshape(test_data.shape[0], -1) / 255.0, test_label)
    print('Test loss = %.4f, correct rate = %.3f' % (loss_test, match_ratio(result_test, test_label)))

    layer_string = []
    layer_paramters = []
    with open('/content/sample_data/npMnistParameters.npy', 'wb') as f:
        for term in net:
            layer_string.append(term.name)
            if term.name == 'linear':
                layer_paramters.append((int(term.parameters[0].shape[0]), int(term.parameters[0].shape[1])))
                np.save(f, term.parameters[0])
                np.save(f, term.parameters[1])
            else:
                layer_paramters.append(None)

    with open('/content/sample_data/npMnistStructure.json', 'w') as f:
        config = {'struct': layer_string, 'num_parametes': layer_paramters}
        json.dump(config, f)

Downloading MNIST dataset. Please do not stop the program    during the download. If you do, remove `data` folder and try again.
total number of trainable parameters: 178110
Before Training.
Test loss = 2.5237, correct rate = 0.042
epoch = 0/40, loss = 0.5267, corret rate = 0.875, test correct rate = 0.856
epoch = 1/40, loss = 0.3677, corret rate = 0.870, test correct rate = 0.898
epoch = 2/40, loss = 0.3192, corret rate = 0.900, test correct rate = 0.909
epoch = 3/40, loss = 0.1949, corret rate = 0.965, test correct rate = 0.915
epoch = 4/40, loss = 0.1846, corret rate = 0.930, test correct rate = 0.919
epoch = 5/40, loss = 0.2291, corret rate = 0.945, test correct rate = 0.928
epoch = 6/40, loss = 0.2168, corret rate = 0.935, test correct rate = 0.931
epoch = 7/40, loss = 0.2544, corret rate = 0.910, test correct rate = 0.934
epoch = 8/40, loss = 0.1421, corret rate = 0.960, test correct rate = 0.937
epoch = 9/40, loss = 0.1619, corret rate = 0.940, test correct rate = 0.942
epoch = 

In [8]:
# from mnistClassification import Linear, Sigmoid, Softmax, CrossEntropy, Mean, load_MNIST, match_ratio, net_forward
import numpy as np
import json


train_data, train_label, test_data, test_label = load_MNIST()

with open('/content/sample_data/npMnistStructure.json', 'r') as f:
    config = json.load(f)

net = []
with open('/content/sample_data/npMnistParameters.npy', 'rb') as f:
    for idx, term in enumerate(config['struct']):
        if term == 'Linear':
            net.append(Linear(config['num_params'][idx][0], config['num_params'][idx][1]))
            parameters = [np.load(f), np.load(f)]
            net[-1].parameters = parameters
        if term == 'linear':
            net.append(Linear(config['num_parametes'][idx][0], config['num_parametes'][idx][1]))
            parameters = [np.load(f), np.load(f)]
            net[-1].parameters = parameters
        elif term == 'sigmoid':
            net.append(Sigmoid())
        elif term == 'softmax':
            net.append(Softmax())
        elif term == 'cross_entropy':
            net.append(CrossEntropy())
        elif term == 'mean':
            net.append(Mean())
        else:
            raise Exception("The loaded node name not recognized!")

result_test, loss_test = net_forward(net, test_data.reshape(test_data.shape[0], -1), test_label)
print('MNIST test correct rate = %.3f' % (match_ratio(result_test, test_label)))


Downloading MNIST dataset. Please do not stop the program    during the download. If you do, remove `data` folder and try again.


  self.y = 1.0 / (1.0 + np.exp(-x))


MNIST test correct rate = 0.964
