In [1]:

from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time

from mlxtend.data import loadlocal_mnist
x_train, y_train = loadlocal_mnist(
            images_path='Data/train-images.idx3-ubyte', 
            labels_path='Data/train-labels.idx1-ubyte')
x_train = (x_train/255).astype('float32')
y_train = to_categorical (y_train)
x_val, y_val = loadlocal_mnist(
            images_path='Data/t10k-images.idx3-ubyte', 
            labels_path='Data/t10k-labels.idx1-ubyte')
x_val = (x_val/255).astype('float32')
y_val = to_categorical (y_val)


In [2]:
print (x_train.shape)

(60000, 784)


In [3]:
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time
"""
x, y = fetch_openml('data/mnist_train_small',  return_X_y=True)
x = (x/255).astype('float32')
y = to_categorical(y)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, random_state=42)
"""

class DeepNeuralNetwork():
    def __init__(self, sizes, epochs=1, l_rate=0.001):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate

        # we save all parameters in the neural network in this dictionary
        self.params = self.initialization()

    def sigmoid(self, x, derivative=False):
        if derivative:
            return (np.exp(-x))/((np.exp(-x)+1)**2)
        return 1/(1 + np.exp(-x))

    def softmax(self, x, derivative=False):
        # Numerically stable with large exponentials
        exps = np.exp(x - x.max())
        if derivative:
            return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
        return exps / np.sum(exps, axis=0)

    def initialization(self):
        # number of nodes in each layer
        input_layer=self.sizes[0]
        hidden_1=self.sizes[1]
        hidden_2=self.sizes[2]
        output_layer=self.sizes[3]

        params = {
            'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
            'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
            'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
        }

        return params

    def forward_pass(self, x_train):
        params = self.params

        # input layer activations becomes sample
        params['A0'] = x_train

        # input layer to hidden layer 1
        params['Z1'] = np.dot(params["W1"], params['A0'])
        params['A1'] = self.sigmoid(params['Z1'])

        # hidden layer 1 to hidden layer 2
        params['Z2'] = np.dot(params["W2"], params['A1'])
        params['A2'] = self.sigmoid(params['Z2'])

        # hidden layer 2 to output layer
        params['Z3'] = np.dot(params["W3"], params['A2'])
        params['A3'] = self.sigmoid(params['Z3'])

        return params['A3']

    def backward_pass(self, y_train, output):
        '''
            This is the backpropagation algorithm, for calculating the updates
            of the neural network's parameters.

            Note: There is a stability issue that causes warnings. This is 
                  caused  by the dot and multiply operations on the huge arrays.
                  
                  RuntimeWarning: invalid value encountered in true_divide
                  RuntimeWarning: overflow encountered in exp
                  RuntimeWarning: overflow encountered in square
        '''
        params = self.params
        change_w = {}

        # Calculate W3 update
        error = 2 * (output - y_train) / output.shape[0] * self.sigmoid(params['Z3'], derivative=True)
        change_w['W3'] = np.outer(error, params['A2'])

        # Calculate W2 update
        error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
        change_w['W2'] = np.outer(error, params['A1'])

        # Calculate W1 update
        error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
        change_w['W1'] = np.outer(error, params['A0'])

        return change_w

    def update_network_parameters(self, changes_to_w):
        '''
            Update network parameters according to update rule from
            Stochastic Gradient Descent.

            θ = θ - η * ∇J(x, y), 
                theta θ:            a network parameter (e.g. a weight w)
                eta η:              the learning rate
                gradient ∇J(x, y):  the gradient of the objective function,
                                    i.e. the change for a specific theta θ
        '''
        
        for key, value in changes_to_w.items():
            self.params[key] -= self.l_rate * value

    def compute_accuracy(self, x_val, y_val):
        '''
            This function does a forward pass of x, then checks if the indices
            of the maximum value in the output equals the indices in the label
            y. Then it sums over each prediction and calculates the accuracy.
        '''
        predictions = []

        for x, y in zip(x_val, y_val):
            output = self.forward_pass(x)
            pred = np.argmax(output)
            predictions.append(pred == np.argmax(y))
        
        return np.mean(predictions)

    def train(self, x_train, y_train, x_val, y_val):
        start_time = time.time()
        for iteration in range(self.epochs):
            for x,y in zip(x_train, y_train):
                output = self.forward_pass(x)
                changes_to_w = self.backward_pass(y, output)
                self.update_network_parameters(changes_to_w)
            
            accuracy = self.compute_accuracy(x_val, y_val)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
                iteration+1, time.time() - start_time, accuracy * 100
            ))
            
dnn = DeepNeuralNetwork(sizes=[784, 128, 64, 10],epochs=1)
Theta1 = dnn.params['W1']
Theta2 = dnn.params['W2']
Theta3 = dnn.params['W3']
dnn.train(x_train , y_train, x_val, y_val)

Epoch: 1, Time Spent: 63.35s, Accuracy: 22.95%


In [4]:
import scipy.io #Used to load the OCTAVE *.mat files
import numpy as np
import sys
sys.path.append ('../src')
from NeuralNetwork import NNClassifier
from ML_utils import UTIL_formatY,backward_prop,backpropagation
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix,classification_report


nn_config={'n_a1':784,'n_a2':128,'n_a3':64,'n_a4':10 }   # Configuración de red NN , input layer , hidder layers , output layer
sgd_dict =   {'steps':1,'learning_rate':0.001}
opt_dict =   {'maxiter':100,'algorithm' : 'TNC'}
method = 'Optimize'
nn = NNClassifier(optimization=method,bias=False,nn_config=nn_config,debug=False,kargs=opt_dict)
costs = nn.optimize (x_train,y_train,l2_lambda=0.0)
prediction,_ = nn.forward_prop (x_train)
result = np.argmax(prediction,axis=1).reshape(-1,1)
y = np.argmax(y_train,axis=1).reshape(-1,1)
accuracy = np.mean(y==result) * 100
print ('Trainig set accuracy :' , accuracy  )

test_predicted,_ = nn.forward_prop (x_val)
result = np.argmax(test_predicted,axis=1).reshape(-1,1)
y = np.argmax(y_val,axis=1).reshape(-1,1)
accuracy = np.mean(y==result) * 100
print ('Test set accuracy :' , accuracy  )

print(f"Classification report for classifier {nn}:\n"
      f"{classification_report(y,result)}\n")

Trainig set accuracy : 94.13333333333334
Test set accuracy : 93.99
Classification report for classifier <NeuralNetwork.NNClassifier object at 0x0000022384341FA0>:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.97      0.98      0.98      1135
           2       0.95      0.93      0.94      1032
           3       0.92      0.94      0.93      1010
           4       0.92      0.95      0.93       982
           5       0.92      0.91      0.91       892
           6       0.95      0.96      0.95       958
           7       0.95      0.94      0.94      1028
           8       0.92      0.91      0.92       974
           9       0.94      0.90      0.92      1009

    accuracy                           0.94     10000
   macro avg       0.94      0.94      0.94     10000
weighted avg       0.94      0.94      0.94     10000




In [5]:
import scipy.io #Used to load the OCTAVE *.mat files
import numpy as np
import sys
sys.path.append ('../src')
from NeuralNetwork import NNClassifier
from ML_utils import UTIL_formatY,backward_prop,backpropagation
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix,classification_report

nn_config={'n_a1':784,'n_a2':128,'n_a3':64,'n_a4':10 }   # Configuración de red NN , input layer , hidder layers , output layer
sgd_dict =   {'steps':1,'learning_rate':0.001}
method = 'SGD'
nn = NNClassifier(optimization=method,bias=False,nn_config=nn_config,debug=False,kargs=sgd_dict)
nn.checkGradient (m=1)

Grad differences 0.000000000129629
