In [5]:
# Get data

from keras.datasets import mnist
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape each 28x28 image -> 784 dim. vector
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalization
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

from keras.utils import np_utils
K=10
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, K)
Y_test = np_utils.to_categorical(y_test, K)

Using TensorFlow backend.


60000 train samples
10000 test samples


In [36]:
def softmax(X):
    # Input matrix X of size Nbxd - Output matrix of same size
    E = np.exp(X)
    return (E.T / np.sum(E,axis=1)).T

In [37]:
def sigmoid(X):
    # Input matrix X of size Nbxd - Output matrix of same size
    E = np.exp(-X) + 1
    return (1./ E)

In [38]:
def deriv_sigmoid(Y):
    # Input matrix X of size Nbxd - Output matrix of same size
    result = np.multiply(Y, 1. - Y)
    return (result)


In [127]:
import math

In [133]:
class FullConnectedLayer:
    
    def __init__(self, input_dim, nbr_units, activation_funct):
        self.input_dim = input_dim
        self.nbr_units = nbr_units
        self.activation_funct = activation_funct
        
        #self.W = np.zeros((input_dim, nbr_units))
        self.W = np.random.normal(loc=0.0, scale=0.1, size =(input_dim, nbr_units))
        #self.W = (1./math.sqrt(20))*np.random.normal(loc=0.0, scale=0.1, size =(input_dim, nbr_units))
        #self.b = np.zeros((1, nbr_units))
        self.b = np.random.normal(loc=0.0, scale=0.1, size =(1, nbr_units))
        #self.b = (1./math.sqrt(20))*np.random.normal(loc=0.0, scale=0.1, size =(1, nbr_units))
        
        self.activation = np.array((0,0))
        self.batch_in = np.array((0,0))
        self.batch_out = np.array((0,0))
        
        self.delta_L_activ = np.array((0,0))
        self.gradW = np.array((0,0))
        self.gradb = np.array((0,0))

    def forward_layer(self, batch_in):
        #batch_size = X_in.shape[0]
        #K = X_in.shape[1]
        #print(b.shape)
        #print(np.matmul(X_in,W).shape)
        self.batch_in = batch_in
        self.activation = np.matmul(batch_in,self.W) + self.b
        if self.activation_funct == "sigmoid":
            self.batch_out = sigmoid(self.activation)
        elif self.activation_funct == "softmax":
            self.batch_out = softmax(self.activation)
        else:
            print("Error: unknown activation function ", act_function)        
        return self.batch_out
    
    def backward_layer(self, delta_L_activ_next=None, W_next=None, out_expected = None):
        batch_size = self.batch_in.shape[0]
        if out_expected is None:
            self.delta_L_activ = np.matmul(delta_L_activ_next, W_next.T)
            delta_batch_out_activ = deriv_sigmoid(self.batch_out)
            self.delta_L_activ = np.multiply(self.delta_L_activ, delta_batch_out_activ)            
            #self.gradW = (1./batch_size)*np.matmul(self.batch_in.T, self.delta_L_activ)
            #self.gradb = (1./batch_size)*np.sum(self.delta_L_activ, axis=0)            
            
        else:
            self.delta_L_activ = self.batch_out - out_expected
            
            
        self.gradW = (1./batch_size)*np.matmul(self.batch_in.T, self.delta_L_activ)
        self.gradb = (1./batch_size)*np.sum(self.delta_L_activ, axis=0)
        
        return self.gradW , self.gradb             
    


In [197]:
class NeuralNet:
    
    def __init__(self):
        self.layers = list()
        
    def addLayer(self, layer):
        self.layers.append(layer)
     
    def forward_net(self, batch_in):
        batch_in_dim = batch_in.shape[1]
        batch_in_size = batch_in.shape[0]
        #Input layer 
        out = self.layers[0].forward_layer(batch_in)
        #The other layers
        for i in range(1, len(self.layers)):
            out = self.layers[i].forward_layer(out)   
        return out
        
    def backward_net(self, batch_expected): 
        #Output layer
        self.layers[len(self.layers) - 1].backward_layer(out_expected = batch_expected)
        #The other layers
        for i in reversed(range(0, len(self.layers)-1)):
            self.layers[i].backward_layer(delta_L_activ_next=self.layers[i+1].delta_L_activ, W_next=self.layers[i+1].W)

    def update_parameters(self, training_rate):
        for layer in self.layers:
            layer.W = layer.W - training_rate*layer.gradW
            layer.b = layer.b - training_rate*layer.gradb
            
    def accuracy(self, images, labels):
        pred = self.forward_net(images)
        return np.where( pred.argmax(axis=1) != labels.argmax(axis=1) , 0.,1.).mean()*100.0

In [172]:
def forward_net(batch_in): 
    
    batch_in_dim = batch_in.shape[1]
    batch_in_size = batch_in.shape[0]
    
    out = neuralNet[0].forward_layer(batch_in)
    out = neuralNet[1].forward_layer(out)
    out = neuralNet[2].forward_layer(out)
    
    return out
    

In [173]:
def backward_net(batch_expected): 
    neuralNet[2].backward_layer(out_expected = batch_expected)
    neuralNet[1].backward_layer(delta_L_activ_next=neuralNet[2].delta_L_activ, W_next=neuralNet[2].W)
    neuralNet[0].backward_layer(delta_L_activ_next=neuralNet[1].delta_L_activ, W_next=neuralNet[1].W)
    

In [174]:
def update_gradients(neuralNetwork, training_rate):
    for layer in neuralNetwork:
        layer.W = layer.W - training_rate*layer.gradW
        layer.b = layer.b - training_rate*layer.gradb

In [175]:
def accuracy(images, labels):
    pred = forward_net(images)
    return np.where( pred.argmax(axis=1) != labels.argmax(axis=1) , 0.,1.).mean()*100.0

In [176]:
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)

In [177]:
N = X_train.shape[0]
d = X_train.shape[1]
numEp = 100 # Number of epochs for gradient descent
eta = 1e-1 # Learning rate
batch_size = 100
nb_batches = int(float(N) / batch_size)

nb_units_l1 = 50
nb_units_l2 = 60
nb_units_l3 = 10

layer1 = FullConnectedLayer(d, nb_units_l1, "sigmoid")
layer2 = FullConnectedLayer(nb_units_l1, nb_units_l2, "sigmoid")
layer3 = FullConnectedLayer(nb_units_l2, nb_units_l3, "softmax")

neuralNet = list()
neuralNet.append(layer1)
neuralNet.append(layer2)
neuralNet.append(layer3)


for epoch in range(numEp):
    for ex in range(nb_batches):
        X_tmp = X_train[ex*batch_size:(ex+1)*batch_size , :]
        Y_tmp = Y_train[ex*batch_size:(ex+1)*batch_size]
        
        forward_net(X_tmp)
        
        backward_net(Y_tmp)
        
        update_gradients(neuralNet, eta)
        
        
        
        

In [198]:
N = X_train.shape[0]
d = X_train.shape[1]
numEp = 100 # Number of epochs for gradient descent
eta = 1e-1 # Learning rate
batch_size = 100
nb_batches = int(float(N) / batch_size)

nb_units_l1 = 50
nb_units_l2 = 60
nb_units_l3 = 10

layer1 = FullConnectedLayer(d, nb_units_l1, "sigmoid")
layer2 = FullConnectedLayer(nb_units_l1, nb_units_l2, "sigmoid")
layer3 = FullConnectedLayer(nb_units_l2, nb_units_l3, "softmax")

neuralNet = NeuralNet()
neuralNet.addLayer(layer1)
neuralNet.addLayer(layer2)
neuralNet.addLayer(layer3)

In [199]:
for epoch in range(numEp):
    for ex in range(nb_batches):
        X_tmp = X_train[ex*batch_size:(ex+1)*batch_size , :]
        Y_tmp = Y_train[ex*batch_size:(ex+1)*batch_size]
        #Forward pass
        neuralNet.forward_net(X_tmp)
        #Backward pass
        neuralNet.backward_net(Y_tmp)
        #Update parameters
        neuralNet.update_parameters(eta)
        

In [200]:
neuralNet.accuracy(X_test, Y_test)

97.35000000000001

In [179]:
neuralNet[0].b

array([[-0.04512837, -0.21050568,  0.06292223, -0.55561657,  0.20642628,
         0.3536345 , -0.27991444, -0.20308844,  0.01269371, -0.10404784,
         0.4963069 , -0.11721533,  0.0185761 ,  0.47494311,  0.28312663,
         0.52208209,  0.23223845, -0.3998097 , -0.17170345, -1.41545479,
        -0.09417589, -0.08442112, -0.13833941,  0.63200501, -0.97288838,
        -0.20850295, -0.17996085, -0.11065262,  0.00321036, -0.27894639,
         0.21331972, -0.33915743, -0.15006311, -0.62099538, -0.39369823,
        -0.14754308, -0.32629783, -0.10046586,  0.50427947,  0.27075762,
         0.45962316,  0.03185908, -0.63992575,  0.18681236, -0.6280786 ,
         0.60742078, -0.07039334, -0.37061146, -0.43002669, -0.70731461]])

In [180]:
neuralNet[1].b

array([[ 0.09298888,  0.01411284,  0.08095532,  0.08136018, -0.09896745,
        -0.11365698,  0.03598974, -0.07527362,  0.12494728, -0.04786962,
        -0.0323821 ,  0.06528738,  0.12212508, -0.09369395, -0.03687836,
        -0.00713546, -0.06323776, -0.00686006, -0.1296068 , -0.24634377,
        -0.03212482,  0.05979797,  0.17004456, -0.03542389, -0.09472866,
        -0.09408564, -0.21083063, -0.06647323, -0.08695949,  0.10896774,
        -0.02160329, -0.27633215,  0.01402253, -0.00890622, -0.08434346,
        -0.04361425, -0.02066997, -0.06758374, -0.01068923,  0.07556103,
        -0.14600683,  0.03087698, -0.13034093, -0.00405622, -0.03850116,
        -0.0338791 ,  0.00844736,  0.04517204,  0.07023021, -0.03099676,
        -0.10125348,  0.05938369, -0.06265839,  0.00469291,  0.18918962,
         0.13476593,  0.22558807, -0.02782373, -0.13229425, -0.04499144]])

In [181]:
neuralNet[0].W

array([[ 3.60304318e-02,  2.84630807e-02, -2.28494839e-01,
         9.67724585e-02,  4.72332401e-02, -5.84972095e-02,
        -9.53353763e-02,  7.38522865e-02, -4.67388103e-02,
         5.94940435e-02,  1.74453982e-02, -3.50387275e-03,
        -2.57581682e-01,  2.11490913e-02, -1.12741168e-01,
        -1.26110418e-01,  1.42378977e-01, -7.25119499e-02,
         7.57964190e-02, -1.12067035e-01,  8.54302012e-02,
        -1.14813323e-01, -1.52120991e-01, -1.54746718e-01,
         1.05289915e-01, -5.44205276e-02, -6.57328090e-02,
         3.30671747e-02, -1.86221588e-02, -9.46798485e-02,
        -4.58172453e-02,  1.11137148e-01,  5.88816476e-02,
         8.24209818e-02, -4.75646831e-03,  1.15435587e-01,
         8.67384609e-02, -1.19808549e-01, -7.67541304e-02,
        -2.24377432e-02,  6.19226699e-02, -5.44839157e-02,
        -1.12689308e-01, -1.57937324e-01,  3.32638074e-02,
         1.22846612e-01,  1.00493037e-01,  1.09095009e-01,
         7.68860584e-03, -8.86978252e-02],
       [-6.03

In [182]:
neuralNet[1].W

array([[-4.80497796e-02,  2.29180670e-01,  5.68000024e-02,
        -9.48351338e-02,  4.12837924e-01,  2.98912828e-01,
         8.85340477e-02, -3.86165234e-01, -7.44928000e-01,
         2.24021997e-01,  2.24371450e-01,  1.15907128e-01,
         3.55212988e-01,  1.79759510e-01, -3.25702750e-01,
         2.09387462e-01, -1.71434873e-01, -4.81268509e-01,
         1.72861139e-01,  6.92845190e-01, -5.19364196e-01,
        -3.68401762e-01,  3.58483179e-01,  4.13931872e-02,
        -8.09093376e-01, -5.54401103e-02, -2.61238488e-01,
        -2.76450533e-01,  1.16882628e-02, -8.46503552e-01,
         2.42833954e-01, -4.90483131e-01,  5.04705847e-01,
         2.13841705e-01,  5.34248907e-01, -1.62905083e-01,
        -6.79626252e-01,  1.28629091e-01,  7.15647867e-01,
        -2.09064722e-01, -1.99987623e-01, -6.21164346e-01,
         4.03022070e-01, -5.04316438e-01, -1.28888182e-01,
         1.37415484e-01,  6.20436856e-01,  2.31513918e-02,
        -7.63130515e-01, -4.29751406e-01, -3.43368970e-0

TypeError: 'NeuralNet' object is not subscriptable