In [1]:
# Get data

from keras.datasets import mnist
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape each 28x28 image -> 784 dim. vector
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalization
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

from keras.utils import np_utils
K=10
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, K)
Y_test = np_utils.to_categorical(y_test, K)

Using TensorFlow backend.


60000 train samples
10000 test samples


In [2]:
def softmax(X):
    # Input matrix X of size Nbxd - Output matrix of same size
    E = np.exp(X)
    return (E.T / np.sum(E,axis=1)).T

In [3]:
def sigmoid(X):
    # Input matrix X of size Nbxd - Output matrix of same size
    E = np.exp(-X) + 1
    return (1./ E)

In [4]:
def deriv_sigmoid(Y):
    # Input matrix X of size Nbxd - Output matrix of same size
    result = np.multiply(Y, 1. - Y)
    return (result)

In [5]:
import math

In [6]:
class FullConnectedLayer:
    
    def __init__(self, input_dim, nbr_units, activation_funct):
        self.input_dim = input_dim
        self.nbr_units = nbr_units
        self.activation_funct = activation_funct
        
        #self.W = np.zeros((input_dim, nbr_units))
        self.W = np.random.normal(loc=0.0, scale=0.1, size =(input_dim, nbr_units))
        #self.W = (1./math.sqrt(20))*np.random.normal(loc=0.0, scale=0.1, size =(input_dim, nbr_units))
        #self.b = np.zeros((1, nbr_units))
        self.b = np.random.normal(loc=0.0, scale=0.1, size =(1, nbr_units))
        #self.b = (1./math.sqrt(20))*np.random.normal(loc=0.0, scale=0.1, size =(1, nbr_units))
        
        # S = X*W + b
        self.S = np.array((0,0))
        self.batch_in = np.array((0,0))
        self.batch_out = np.array((0,0))
        
        # dLdS: derivative of Loss with respect to S
        self.dLdS = np.array((0,0))
        self.gradW = np.array((0,0))
        self.gradb = np.array((0,0))

    def forward_layer(self, batch_in):
        self.batch_in = batch_in
        self.S = np.matmul(batch_in,self.W) + self.b
        if self.activation_funct == "sigmoid":
            self.batch_out = sigmoid(self.S)
        elif self.activation_funct == "softmax":
            self.batch_out = softmax(self.S)
        else:
            print("Error: unknown activation function ", act_function)        
        return self.batch_out
    
    # dLdS_next: dLdS: derivative of Loss with respect to S of the adjacent layer
    def backward_layer(self, dLdS_next=None, W_next=None, out_expected = None):
        batch_size = self.batch_in.shape[0]
        if out_expected is None:
            self.dLdS = np.matmul(dLdS_next, W_next.T)
            delta_batch_out_activ = deriv_sigmoid(self.batch_out)
            self.dLdS = np.multiply(self.dLdS, delta_batch_out_activ)          
        else:
            self.dLdS = self.batch_out - out_expected            
            
        self.gradW = (1./batch_size)*np.matmul(self.batch_in.T, self.dLdS)
        self.gradb = (1./batch_size)*np.sum(self.dLdS, axis=0)
        
        return self.gradW , self.gradb

In [7]:
class NeuralNet:
    
    def __init__(self):
        self.layers = list()
        
    def addLayer(self, layer):
        self.layers.append(layer)
     
    def forward_net(self, batch_in):
        batch_in_dim = batch_in.shape[1]
        batch_in_size = batch_in.shape[0]
        #Input layer 
        out = self.layers[0].forward_layer(batch_in)
        #The other layers
        for i in range(1, len(self.layers)):
            out = self.layers[i].forward_layer(out)   
        return out
        
    def backward_net(self, batch_expected): 
        #Output layer
        self.layers[len(self.layers) - 1].backward_layer(out_expected = batch_expected)
        #The other layers
        for i in reversed(range(0, len(self.layers)-1)):
            self.layers[i].backward_layer(dLdS_next=self.layers[i+1].dLdS, W_next=self.layers[i+1].W)

    def update_parameters(self, training_rate):
        for layer in self.layers:
            layer.W = layer.W - training_rate*layer.gradW
            layer.b = layer.b - training_rate*layer.gradb
    
    def train(self, X_train, Y_train, epochs, batch_size, training_rate):
        N = X_train.shape[0]
        nb_batches = int(float(N) / batch_size)
        for epoch in range(epochs):
            for ex in range(nb_batches):
                X_tmp = X_train[ex*batch_size:(ex+1)*batch_size , :]
                Y_tmp = Y_train[ex*batch_size:(ex+1)*batch_size]
                #Forward pass
                self.forward_net(X_tmp)
                #Backward pass
                self.backward_net(Y_tmp)
                #Update parameters
                self.update_parameters(training_rate)  
    
    def accuracy(self, images, labels):
        pred = self.forward_net(images)
        return np.where( pred.argmax(axis=1) != labels.argmax(axis=1) , 0.,1.).mean()*100.0

In [8]:
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)

In [36]:
#Neural net definition 

N = X_train.shape[0]
d = X_train.shape[1]

numEp = 100 # Number of epochs for gradient descent
eta = 1e-1 # Learning rate
batch_size = 100

nb_units_l1 = 50
nb_units_l2 = 40
nb_units_l3 = 10

layer1 = FullConnectedLayer(d, nb_units_l1, "sigmoid")
layer2 = FullConnectedLayer(nb_units_l1, nb_units_l2, "sigmoid")
layer3 = FullConnectedLayer(nb_units_l2, nb_units_l3, "softmax")

neuralNet = NeuralNet()
neuralNet.addLayer(layer1)
neuralNet.addLayer(layer2)
neuralNet.addLayer(layer3)

In [37]:
neuralNet.train(X_train, Y_train, numEp, batch_size, eta)

In [38]:
neuralNet.accuracy(X_test, Y_test)

97.1