# Validating NN Model

Using validation curves to validate the model performance 

In [1]:
import activation_functions
import loss_functions
import process_MNIST
from init_NN import NeuralNetwork

#local path to files 
training_images_path =  'training_data/train-images.idx3-ubyte'
training_labels_path =  'training_data/train-labels.idx1-ubyte'
test_images_path = 'test_data/t10k-images.idx3-ubyte'
test_labels_path =  'test_data/t10k-labels.idx1-ubyte'


#Loads MNIST data files. Assumes unzipped idx files avilable at: http://yann.lecun.com/exdb/mnist/
training_images, training_labels = process_MNIST.load_data(training_images_path, training_labels_path)
test_images, test_labels = process_MNIST.load_data(test_images_path, test_labels_path)

Testing Model with Learning Rate 0.001 using Stochastic Gradient Descent

In [2]:
#Define NN architecture. Input size (first layer) and output size (last layer) can't change.
#Will have linear activation functions between each layer, and ReLU activation functions after 
#each linear activation function except between last hidden layer and output layer. 
lsize = [784, 128, 64, 10] #e.g. this structure will have 3 linear activation functions and 2 ReLU fcns
lr = 0.001

##initialize NN 
nn_lr001_sgd = NeuralNetwork(lsize, activation_functions.ReLU, activation_functions.dReLU, loss_functions.cross_entropy_w_softmax, loss_functions.dcross_entropy, lr)

##train NN (example uses mini-batch)
nn_lr001_sgd.train(training_images, training_labels, epochs = 10, batch_size=1)
#gradient descent means batch size = # training samples (# steps you take = # epochs)
#stochastic gradient descent means batch size = 1

##test NN
nn_lr001_sgd_cross_ent_error, nn_lr001_sgd_classification_error = nn_lr001_sgd.test(test_images, test_labels)

  0%|          | 0/10 [00:00<?, ?it/s]

epoch number 0


100%|██████████| 60000/60000 [01:26<00:00, 695.64it/s]
 10%|█         | 1/10 [01:26<12:57, 86.41s/it]

epoch number 1


100%|██████████| 60000/60000 [01:49<00:00, 549.92it/s]
 20%|██        | 2/10 [03:15<13:18, 99.87s/it]

epoch number 2


100%|██████████| 60000/60000 [01:32<00:00, 646.25it/s]
 30%|███       | 3/10 [04:48<11:17, 96.74s/it]

epoch number 3


100%|██████████| 60000/60000 [01:37<00:00, 616.32it/s]
 40%|████      | 4/10 [06:26<09:42, 97.04s/it]

epoch number 4


100%|██████████| 60000/60000 [01:35<00:00, 628.15it/s]
 50%|█████     | 5/10 [08:01<08:02, 96.56s/it]

epoch number 5


100%|██████████| 60000/60000 [01:38<00:00, 610.64it/s]
 60%|██████    | 6/10 [09:40<06:28, 97.21s/it]

epoch number 6


100%|██████████| 60000/60000 [01:52<00:00, 531.25it/s]
 70%|███████   | 7/10 [11:33<05:07, 102.41s/it]

epoch number 7


100%|██████████| 60000/60000 [01:48<00:00, 551.09it/s]
 80%|████████  | 8/10 [13:22<03:29, 104.51s/it]

epoch number 8


100%|██████████| 60000/60000 [01:08<00:00, 881.58it/s]
 90%|█████████ | 9/10 [14:30<01:33, 93.17s/it] 

epoch number 9


100%|██████████| 60000/60000 [01:06<00:00, 897.20it/s] 
100%|██████████| 10/10 [15:37<00:00, 93.78s/it]


Average cross entropy loss: 0.42922857933322484
Classification accuracy: 89.49000000000001%


Testing Model with Learning Rate 0.001 using Gradient Descent

In [3]:
lr = 0.001
nn_lr001_gd = NeuralNetwork(lsize, activation_functions.ReLU, activation_functions.dReLU, loss_functions.cross_entropy_w_softmax, loss_functions.dcross_entropy, lr)
nn_lr001_gd.train(training_images, training_labels, epochs = 10, batch_size=len(training_images))
nn_lr001_gd_cross_ent_error, nn_lr001_gd_classification_error = nn_lr001_sgd.test(test_images, test_labels)

  0%|          | 0/10 [00:00<?, ?it/s]

epoch number 0


100%|██████████| 1/1 [00:35<00:00, 35.74s/it]
 10%|█         | 1/10 [00:35<05:22, 35.88s/it]

epoch number 1


100%|██████████| 1/1 [00:34<00:00, 34.76s/it]
 20%|██        | 2/10 [01:10<04:42, 35.30s/it]

epoch number 2


100%|██████████| 1/1 [00:33<00:00, 33.85s/it]
 30%|███       | 3/10 [01:44<04:02, 34.69s/it]

epoch number 3


100%|██████████| 1/1 [00:35<00:00, 35.88s/it]
 40%|████      | 4/10 [02:20<03:31, 35.21s/it]

epoch number 4


100%|██████████| 1/1 [00:33<00:00, 33.72s/it]
 50%|█████     | 5/10 [02:54<02:53, 34.72s/it]

epoch number 5


100%|██████████| 1/1 [00:37<00:00, 37.66s/it]
 60%|██████    | 6/10 [03:32<02:23, 35.76s/it]

epoch number 6


100%|██████████| 1/1 [00:35<00:00, 35.33s/it]
 70%|███████   | 7/10 [04:07<01:46, 35.67s/it]

epoch number 7


100%|██████████| 1/1 [00:35<00:00, 35.02s/it]
 80%|████████  | 8/10 [04:43<01:11, 35.50s/it]

epoch number 8




In [None]:
lr = 0.01
nn_lr01_sgd = NeuralNetwork(lsize, activation_functions.ReLU, activation_functions.dReLU, loss_functions.cross_entropy_w_softmax, loss_functions.dcross_entropy, lr)
nn_lr01_sgd.train(training_images, training_labels, epochs = 10, batch_size=1)
nn_lr01_sgd_cross_ent_error, nn_lr001_sgd_classification_error = nn_lr001_sgd.test(test_images, test_labels)


In [None]:

nn_lr01_gd = NeuralNetwork(lsize, activation_functions.ReLU, activation_functions.dReLU, loss_functions.cross_entropy_w_softmax, loss_functions.dcross_entropy, lr)
nn_lr01_gd.train(training_images, training_labels, epochs = 10, batch_size=len(training_images))
nn_lr01_gd_cross_ent_error, nn_lr001_gd_classification_error = nn_lr001_sgd.test(test_images, test_labels)