In [1]:
import numpy as np
from h5py import File
import scipy.io #Used to load the OCTAVE *.mat files
import numpy as np
import sys
sys.path.append ('../src')
from NeuralNetwork import NNClassifier
from ML_utils import softmax,sigmoid,UTIL_formatY,backward_prop,backpropagation
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix,classification_report




In [2]:
MNIST_data = File("data/MNISTdata.hdf5", 'r')
x_train = np.float32(MNIST_data['x_train'][:])
y_train = np.int32(np.array(MNIST_data['y_train'][:, 0])).reshape(-1, 1)
x_test  = np.float32(MNIST_data['x_test'][:])
y_test  = np.int32(np.array(MNIST_data['y_test'][:, 0])).reshape(-1, 1)
MNIST_data.close()

# stack together for next step
X = np.vstack((x_train, x_test))
print (X.shape)
y = np.vstack((y_train, y_test))
print (y.shape)

# one-hot encoding
digits = 10
examples = y.shape[0]
y = y.reshape(1, examples)
Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits, examples)


# number of training set
m = 60000
m_test = X.shape[0] - m
X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:, :m], Y_new[:, m:]


# shuffle training set
shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]

(70000, 784)
(70000, 1)


In [3]:
opt = {'n_h':64,'n_x':784,'epochs':10, 'batch_size':64 , 'beta':0.9,'lr':0.5}

# initialization
params = {"W1": np.random.randn(opt['n_h'], opt['n_x']) * np.sqrt(1. / opt['n_x']),
          "b1": np.zeros((opt['n_h'], 1)) * np.sqrt(1. / opt['n_x']),
          "W2": np.random.randn(digits, opt['n_h']) * np.sqrt(1. / opt['n_h']),
          "b2": np.zeros((digits, 1)) * np.sqrt(1. / opt['n_h'])}
print (params['W1'].shape)
print (params['b1'].shape)
print (params['W2'].shape)
print (params['b2'].shape)

(64, 784)
(64, 1)
(10, 64)
(10, 1)


In [5]:
T1 = np.copy(np.hstack(( params['b1'],params['W1'])))
print (T1.shape)
T2 = np.copy(np.hstack(( params['b2'],params['W2'],)))
print (T2.shape)


(64, 785)
(10, 65)


In [6]:

nn_config={'n_a1':784,'n_a2':64,'n_a3':10 }   # Configuración de red NN , input layer , hidder layers , output layer
sgd_dict =   {'steps':10,'learning_rate':0.5,'mini_batch_size':2**8}
opt_dict =   {'maxiter':100,'algorithm' : 'TNC'}
activ = {'activation_a2':sigmoid,'activation_a3':softmax}
method = 'miniBatchGD'
nn = NNClassifier(optimization=method,bias=True,nn_config=nn_config,activ=activ,debug=False,kargs=sgd_dict)
print (nn.thetas['Theta1'].shape)
print (nn.thetas['Theta2'].shape)
nn.thetas['Theta1'] = T1
nn.thetas['Theta2'] = T2

(64, 785)
(10, 65)


In [7]:
def sigmoid(z):
    """
    sigmoid activation function.

    inputs: z
    outputs: sigmoid(z)
    """
    s = 1. / (1. + np.exp(-z))
    return s
def compute_loss(Y, Y_hat):
    """
    compute loss function
    """
    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    L = -(1./m) * L_sum

    return L
def feed_forward(X, params):
    """
    feed forward network: 2 - layer neural net

    inputs:
        params: dictionay a dictionary contains all the weights and biases

    return:
        cache: dictionay a dictionary contains all the fully connected units and activations
    """
    cache = {}

    # Z1 = W1.dot(x) + b1
    cache["Z1"] = np.matmul(params["W1"], X) + params["b1"]
    
    # A1 = sigmoid(Z1)
    cache["A1"] = sigmoid(cache["Z1"])

    # Z2 = W2.dot(A1) + b2
    cache["Z2"] = np.matmul(params["W2"], cache["A1"]) + params["b2"]

    # A2 = softmax(Z2)
    cache["A2"] = np.exp(cache["Z2"]) / np.sum(np.exp(cache["Z2"]), axis=0)
#    cache["A2"] = sigmoid(cache["Z2"])

    return cache
def back_propagate(X, Y, params, cache, m_batch):
    """
    back propagation

    inputs:
        params: dictionay a dictionary contains all the weights and biases
        cache: dictionay a dictionary contains all the fully connected units and activations

    return:
        grads: dictionay a dictionary contains the gradients of corresponding weights and biases
    """
    # error at last layer
    dZ2 = cache["A2"] - Y

    # gradients at last layer (Py2 need 1. to transform to float)
    dW2 = (1. / m_batch) * np.matmul(dZ2, cache["A1"].T)
    db2 = (1. / m_batch) * np.sum(dZ2, axis=1, keepdims=True)

    # back propgate through first layer
    dA1 = np.matmul(params["W2"].T, dZ2)
    dZ1 = dA1 * sigmoid(cache["Z1"]) * (1 - sigmoid(cache["Z1"]))

    # gradients at first layer (Py2 need 1. to transform to float)
    dW1 = (1. / m_batch) * np.matmul(dZ1, X.T)
    db1 = (1. / m_batch) * np.sum(dZ1, axis=1, keepdims=True)

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}

    return grads

In [8]:


batches=10
for i in range(opt['epochs']):

    # shuffle training set
    permutation = np.random.permutation(X_train.shape[1])
    X_train_shuffled = X_train[:, permutation]
    Y_train_shuffled = Y_train[:, permutation]

    for j in range(batches):
        #np.testing.assert_allclose (params['W1'],nn.thetas['Theta1'][:,1:])
        #np.testing.assert_allclose (params['W2'],nn.thetas['Theta2'][:,1:])
        #np.testing.assert_allclose (params['b1'],nn.thetas['Theta1'][:,0:1])
        #np.testing.assert_allclose (params['b2'],nn.thetas['Theta2'][:,0:1])

        # get mini-batch
        begin = j * opt['batch_size']
        end = min(begin + opt['batch_size'], X_train.shape[1] - 1)
        X = X_train_shuffled[:, begin:end]
        Y = Y_train_shuffled[:, begin:end]
        m_batch = end - begin

        nnX_train = np.copy(X.reshape (X.shape[1],X.shape[0]))
        nnY_train = np.copy(Y.reshape (Y.shape[1],Y.shape[0]))

        # forward and backward
        cache = feed_forward(X, params)
        #prediction,nncache = nn.forward_prop (X.T)
        #np.testing.assert_allclose (prediction.T,cache['A2'])
        grads = back_propagate(X, Y, params, cache, m_batch)
        #delta,nngrads = nn.backward_prop (X.T,Y.T)
        #np.testing.assert_allclose (grads['dW1'],nngrads['grad1'][:,1:])
        #np.testing.assert_allclose (grads['dW2'],nngrads['grad2'][:,1:])
        #np.testing.assert_allclose (grads['db1'],nngrads['grad1'][:,0:1])
        #np.testing.assert_allclose (grads['db2'],nngrads['grad2'][:,0:1])

        # with momentum (optional)
        dW1 = grads ['dW1']
        db1 = grads ['db1']
        dW2 = grads ['dW2']
        db2 = grads ['db2']
        
        dW1 = (opt['beta'] * dW1 + (1. - opt['beta']) * grads["dW1"])
        db1 = (opt['beta'] * db1 + (1. - opt['beta']) * grads["db1"])
        dW2 = (opt['beta'] * dW2 + (1. - opt['beta']) * grads["dW2"])
        db2 = (opt['beta'] * db2 + (1. - opt['beta']) * grads["db2"])
    
        # gradient descent
        params["W1"] = params["W1"] - opt['lr'] * dW1
        params["b1"] = params["b1"] - opt['lr'] * db1
        params["W2"] = params["W2"] - opt['lr'] * dW2
        params["b2"] = params["b2"] - opt['lr'] * db2
        #nn._updateThetas (nngrads,opt['lr'])
        #np.testing.assert_allclose (params['W1'],nn.thetas['Theta1'][:,1:])
        
    # forward pass on training set
    cache = feed_forward(X_train, params)
    #prediction,_ = nn.forward_prop (X_train.T)
    train_loss = compute_loss(Y_train, cache["A2"])
    #cost = nn.costFunction (X_train.T,Y_train.T)
    # forward pass on test set
    cache = feed_forward(X_test, params)
    test_loss = compute_loss(Y_test, cache["A2"])
    print("Epoch {}: training loss = {}, test loss = {}".format(
        i + 1, train_loss, test_loss))

Epoch 1: training loss = 1.970743261533071, test loss = 1.9629573743205424
Epoch 2: training loss = 1.5994230516340866, test loss = 1.5875651002471802
Epoch 3: training loss = 1.2855953514193994, test loss = 1.2678200435568923
Epoch 4: training loss = 1.0735109749297258, test loss = 1.0567940953373172
Epoch 5: training loss = 0.9041955996535975, test loss = 0.8853720079015203
Epoch 6: training loss = 0.8177456112023864, test loss = 0.7986463499783694
Epoch 7: training loss = 0.7207538807386803, test loss = 0.7001803659369845
Epoch 8: training loss = 0.6841058784790693, test loss = 0.6695612783391528
Epoch 9: training loss = 0.6166655116708453, test loss = 0.6008955548506377
Epoch 10: training loss = 0.5808964832872017, test loss = 0.5640612607930451


In [3]:
from sklearn.metrics import multilabel_confusion_matrix,classification_report
result = feed_forward (X_test,params)
prediction = np.argmax(result['A2'],axis=0)
print (prediction[0:5])

print(f"Classification report for classifier :\n"
      f"{classification_report(y_test,prediction)}\n")

NameError: name 'feed_forward' is not defined

In [3]:
nn_config={'n_a1':784,'n_a2':64,'n_a3':10 }   # Configuración de red NN , input layer , hidder layers , output layer
sgd_dict =   {'steps':10,'learning_rate':0.5,'mini_batch_size':2**8}
opt_dict =   {'maxiter':100,'algorithm' : 'TNC'}
activ = {'activation_a2':sigmoid,'activation_a3':softmax}
method = 'miniBatchGD'

nn = NNClassifier(optimization=method,bias=True,nn_config=nn_config,activ=activ,debug=False,kargs=sgd_dict)
costs = nn.optimize (X_train.T,Y_train.T,l2_lambda=0.0)
prediction,_ = nn.forward_prop (X_train.T)
result = np.argmax(prediction,axis=1).reshape(-1,1)
y = np.argmax(Y_train.T,axis=1).reshape(-1,1)

accuracy = np.mean(y==result) * 100
print ('Trainig set accuracy :' , accuracy  )

test_predicted,_ = nn.forward_prop (X_test.T)
result = np.argmax(test_predicted,axis=1).reshape(-1,1)
y = np.argmax(Y_test.T,axis=1).reshape(-1,1)
accuracy = np.mean(y==result) * 100
print ('Test set accuracy :' , accuracy  )

print(f"Classification report for classifier {nn}:\n"
      f"{classification_report(y,result)}\n")

LR = 0.5:  30%|█████████████████████▉                                                   | 3/10 [00:09<00:21,  3.14s/it]


KeyboardInterrupt: 

In [8]:
nn_config={'n_a1':784,'n_a2':128 ,'n_a3':64,'n_a4':25 , 'n_a5':10}   # Configuración de red NN , input layer , hidder layers , output layer
sgd_dict =   {'steps':50,'learning_rate':0.5,'mini_batch_size':2**8}
opt_dict =   {'maxiter':500,'algorithm' : 'TNC'}
activ = {'activation_a2':sigmoid,'activation_a3':sigmoid,'activation_a4':sigmoid,'activation_a5':softmax}
method = 'miniBatchGD'
if method == 'Optimize':
    midict = opt_dict
else:
    midict = sgd_dict
    
nn = NNClassifier(optimization=method,bias=True,nn_config=nn_config,activ=activ,debug=False,kargs=midict)
costs = nn.optimize (X_train.T,Y_train.T,l2_lambda=0.0)
prediction,_ = nn.forward_prop (X_train.T)
result = np.argmax(prediction,axis=1).reshape(-1,1)
y = np.argmax(Y_train.T,axis=1).reshape(-1,1)

accuracy = np.mean(y==result) * 100
print ('Trainig set accuracy :' , accuracy  )

test_predicted,_ = nn.forward_prop (X_test.T)
result = np.argmax(test_predicted,axis=1).reshape(-1,1)
y = np.argmax(Y_test.T,axis=1).reshape(-1,1)
accuracy = np.mean(y==result) * 100
print ('Test set accuracy :' , accuracy  )H

print(f"Classification report for classifier {nn}:\n"
      f"{classification_report(y,result)}\n")

LR = 0.5: 100%|████████████████████████████████████████████████████████████████████████| 50/50 [03:11<00:00,  3.83s/it]


Trainig set accuracy : 99.58666666666667
Test set accuracy : 97.56
Classification report for classifier <NeuralNetwork.NNClassifier object at 0x00000257A0091EE0>:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.98      0.99      0.99      1135
           2       0.98      0.97      0.97      1032
           3       0.98      0.98      0.98      1010
           4       0.97      0.98      0.97       982
           5       0.98      0.97      0.98       892
           6       0.97      0.98      0.98       958
           7       0.97      0.97      0.97      1028
           8       0.97      0.97      0.97       974
           9       0.97      0.96      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000


