# Feed Forward Neural Network with Back Propagation & Gradient Descent

Write a program to train a feed forward neural network with 2 hidden layers, 1 input layer, 1 output layer utilizing the back propagation algorithm and gradient descent optimization, while employing the cross entropy loss function and soft max activation function on output layer for effective multiclass classification

In [53]:
# importing libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.optimizers import SGD
from keras.datasets import mnist

# loading dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32')/255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32')/255

# One-hot encoding
train_labels = np.eye(10)[train_labels]
test_labels = np.eye(10)[test_labels]

print("Training Size: ", len(train_labels))
print("Test Size: ", len(test_labels))
print("Shape: ",train_images[0].shape)

# define the CNN model

model = Sequential()

model.add(Flatten(input_shape=(28, 28, 1))) # 28x28 image, 1 channel (grayscale)
model.add(Dense(128, activation='relu')) # 128 neurons
model.add(Dense(128, activation='relu')) # 128 neurons
model.add(Dense(10, activation='softmax')) # 10 classes

# compile model
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# fit the model
model.fit(train_images, train_labels, epochs=20)

# evaluate the model
scores = model.evaluate(test_images, test_labels)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Training Size:  60000
Test Size:  10000
Shape:  (28, 28, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

accuracy: 98.12%


In [57]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.utils import to_categorical

# load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# normalize the pixel values to be between 0 and 1
train_images = train_images / 255.0
test_images = test_images / 255.0

# flatten the images into a 784-dimensional vector
train_images = train_images.reshape((60000, 784))
test_images = test_images.reshape((10000, 784))

# convert the labels to one-hot encoded vectors
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# define the number of neurons in each layer
num_input_neurons = 784
num_hidden_neurons1 = 256
num_hidden_neurons2 = 128
num_output_neurons = 10

# define the learning rate and number of epochs
learning_rate = 0.1
num_epochs = 2

# initialize the weights and biases of the network
W1 = np.random.randn(num_input_neurons, num_hidden_neurons1)
b1 = np.zeros((1, num_hidden_neurons1))

W2 = np.random.randn(num_hidden_neurons1, num_hidden_neurons2)
b2 = np.zeros((1, num_hidden_neurons2))

W3 = np.random.randn(num_hidden_neurons2, num_output_neurons)
b3 = np.zeros((1, num_output_neurons))

# define the forward pass function
def forward_pass(x):
    z1 = np.dot(x, W1) + b1
    a1 = np.maximum(0, z1) # ReLU activation function
    z2 = np.dot(a1, W2) + b2
    a2 = np.maximum(0, z2) # ReLU activation function
    z3 = np.dot(a2, W3) + b3
    y_pred = np.exp(z3) / np.sum(np.exp(z3), axis=1, keepdims=True) # softmax activation function
    return y_pred

# loop over the epochs
for epoch in range(num_epochs):
    # loop over the training examples
    for i in range(len(train_images)):
        # forward pass
        x = train_images[i].reshape((1, 784))
        y_true = train_labels[i].reshape((1, 10))
        y_pred = forward_pass(x)
        
        # compute the loss and print it out
        loss = -np.sum(y_true * np.log(y_pred))
        print("Epoch %d, Example %d, Loss: %.2f" % (epoch+1, i+1, loss))
        
        # backward pass
        delta4 = y_pred - y_true
        delta3 = np.dot(delta4, W3.T) * (np.maximum(0, z2) > 0) # ReLU derivative
        delta2 = np.dot(delta3, W2.T) * (np.maximum(0, z1) > 0) # ReLU derivative
        
        # update the weights and biases
        dW3 = np.dot(a2.T, delta4)
        db3 = np.sum(delta4, axis=0, keepdims=True)
        dW2 = np.dot(a1.T, delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        dW1 = np.dot(x.T, delta2)
        db1 = np.sum(delta2, axis=0, keepdims=True)
        
        W3 -= learning_rate * dW3
        b3 -= learning_rate * db3
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1

    # evaluate the model on the test set and print the accuracy
    y_pred = forward_pass(test_images)
    y_pred_labels = np.argmax(y_pred, axis=1)
    test_labels_labels = np.argmax(test_labels, axis=1)
    accuracy = np.mean(y_pred_labels == test_labels_labels)
    print("Epoch %d, Test Accuracy: %.2f" % (epoch+1, accuracy))

  y_pred = np.exp(z3) / np.sum(np.exp(z3), axis=1, keepdims=True) # softmax activation function
  y_pred = np.exp(z3) / np.sum(np.exp(z3), axis=1, keepdims=True) # softmax activation function
  loss = -np.sum(y_true * np.log(y_pred))
  loss = -np.sum(y_true * np.log(y_pred))


Epoch 1, Example 1, Loss: nan
Epoch 1, Example 2, Loss: nan
Epoch 1, Example 3, Loss: nan
Epoch 1, Example 4, Loss: nan
Epoch 1, Example 5, Loss: nan
Epoch 1, Example 6, Loss: nan
Epoch 1, Example 7, Loss: nan
Epoch 1, Example 8, Loss: nan
Epoch 1, Example 9, Loss: nan
Epoch 1, Example 10, Loss: nan
Epoch 1, Example 11, Loss: nan
Epoch 1, Example 12, Loss: nan
Epoch 1, Example 13, Loss: nan
Epoch 1, Example 14, Loss: nan
Epoch 1, Example 15, Loss: nan
Epoch 1, Example 16, Loss: nan
Epoch 1, Example 17, Loss: nan
Epoch 1, Example 18, Loss: nan
Epoch 1, Example 19, Loss: nan
Epoch 1, Example 20, Loss: nan
Epoch 1, Example 21, Loss: nan
Epoch 1, Example 22, Loss: nan
Epoch 1, Example 23, Loss: nan
Epoch 1, Example 24, Loss: nan
Epoch 1, Example 25, Loss: nan
Epoch 1, Example 26, Loss: nan
Epoch 1, Example 27, Loss: nan
Epoch 1, Example 28, Loss: nan
Epoch 1, Example 29, Loss: nan
Epoch 1, Example 30, Loss: nan
Epoch 1, Example 31, Loss: nan
Epoch 1, Example 32, Loss: nan
Epoch 1, Example 

In [17]:
# importing libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.optimizers import SGD

In [54]:
# importing dataset - mnist digits
from keras.datasets import mnist

# loading dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32')/255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32')/255

# One-hot encoding
train_labels = np.eye(10)[train_labels]
test_labels = np.eye(10)[test_labels]

In [38]:
print("Training Size: ", len(train_labels))
print("Test Size: ", len(test_labels))

Training Size:  60000
Test Size:  10000


In [39]:
train_images[0].shape

(28, 28, 1)

In [45]:
# define the CNN model

model = Sequential()

model.add(Flatten(input_shape=(28, 28, 1))) # 28x28 image, 1 channel (grayscale)
model.add(Dense(128, activation='relu')) # 128 neurons
model.add(Dense(128, activation='relu')) # 128 neurons
model.add(Dense(10, activation='softmax')) # 10 classes

In [46]:
# compile model
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

  super().__init__(name, **kwargs)


In [48]:
# fit the model
model.fit(train_images, train_labels, epochs=35)

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


<keras.callbacks.History at 0x25784d706d0>

In [49]:
# evaluate the model
scores = model.evaluate(test_images, test_labels)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))



accuracy: 96.40%
