# Keras

In [4]:
from keras.datasets import mnist
from keras.utils import np_utils
from keras.initializers import RandomNormal
import seaborn as sns
# if bydefault not using tensorflow as backend for keras then do this command
"KERAS_BACKEND=tensorflow"


'KERAS_BACKEND=tensorflow'

In [5]:
# draw plotting in matplot lab for much more understanding 
%matplotlib notebook
import matplotlib.pyplot as ptl
import time
import numpy as np

# dynamic_plot function --->so this function automatically update the plot for each epoch and error
def dynamic_plot(x,vy ,ty ,ax, colors=['b']):
    ax.plot(x,vy,'b',label="validation_loss")
    ax.plot(x,ty,'r', label="Train_loss")
    plt.legend()
    plt.grid()
    fig.canvas.draw()

In [6]:
# the data in mnist dataset
# shuffled and split b/w train and test
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [7]:
print("Number of data in train dataset:", X_train.shape[0],"and each image is shape of (%d,%d) pixel"%(X_train.shape[1], X_train.shape[2]))
print("Number of data in test dataset:", X_test.shape[0],"and each image is shape of (%d,%d) pixel"%(X_test.shape[1],X_test.shape[2]))

Number of data in train dataset: 60000 and each image is shape of (28,28) pixel
Number of data in test dataset: 10000 and each image is shape of (28,28) pixel


In [5]:
#if you observe the input image is 2 dimensional vector
# for each image we have a (28*28) vector
# we will convert the (28*28) vector into a single dimensional vector(1*784)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])

In [6]:
# after converting the 2d vector into 1d vector
print("Number of data in train dataset:", X_train.shape[0],"and each image is shape of (%d) pixel"%(X_train.shape[1]))
print("Number of data in test dataset:", X_test.shape[0],"and each image is shape of (%d) pixel"%(X_test.shape[1]))

Number of data in train dataset: 60000 and each image is shape of (784) pixel
Number of data in test dataset: 10000 and each image is shape of (784) pixel


In [7]:
# an Example datapont
print(X_train[0])

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255
 247 127   0   0   0   0   0   0   0   0   0   0   0   0  30  36  94 154
 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0   0   0
   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82
  82  56  39   0   0   0   0   0   0   0   0   0   0   0   0  18 219 253
 253 253 253 253 198 182 247 241   0   0   0   0   

In [8]:
# if you observe the above matric each cell containing value from 0-255
# before going to ML model first we normalized the data

X_train = X_train/255
X_test = X_test/255

In [9]:
# After Normalizing the data 
# An example datapoint
print(X_train[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [10]:
# here we have a class number for each image
print("class number for first image that is store at index 0: ",Y_train[0])

class number for first image that is store at index 0:  5


In [11]:
# lets convert this into a 10 dimensional vector
#example: consider an image is 5 convert into 5==>[0,0,0,0,0,1,0,0,0,0]
# this converion is very important for MLP
Y_train = np_utils.to_categorical(Y_train,10)
Y_test = np_utils.to_categorical(Y_test,10)

In [12]:
print("After converting the output(classlabel) into a 10 dimensional vector: ", Y_train[0])

After converting the output(classlabel) into a 10 dimensional vector:  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


# Softmax classifier

In [13]:
# import Sequential model b/z data sequentially move from oinput layer to 1-hidden layer and to 2-hidden layer and soo on..
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation


In [14]:
# define some model parameter
batch_size = 128
output_dim = 10
input_dim = X_train.shape[1]
nb_epoch = 20

In [15]:
input_dim

784

In [16]:
# start building a mode
model = Sequential()
# with the add() method add layer 


# The model needs to know what input shape it should expect. 
# For this reason, the first layer in a Sequential model 
# (and only the first, because following layers can do automatic shape inference)
# needs to receive information about its input shape. 
# you can use input_shape and input_dim to pass the shape of input

# output_dim represent the number of nodes need in that layer
# here we have 10 nodes b/z we 10 class classification


model.add(Dense(output_dim, input_dim=input_dim, activation='softmax'))

In [17]:
Y_test.shape[1]

10

In [18]:
# Before training a model, you need to configure the learning process, which is done via the compile method

# It receives three arguments:
# An optimizer. This could be the string identifier of an existing optimizer , https://keras.io/optimizers/
# A loss function. This is the objective that the model will try to minimize., https://keras.io/losses/
# A list of metrics. For any classification problem you will want to set this to metrics=['accuracy'].  https://keras.io/metrics/


# Note: when using the categorical_crossentropy loss, your targets should be in categorical format 
# (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros except 
# for a 1 at the index corresponding to the class of the sample).

# that is why we converted out labels into vectors

# with the help compile method we configure the learning process
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

# Keras models are trained on Numpy arrays of input data and labels. 
# For training a model, you will typically use the  fit function

# fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, 
# validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, 
# validation_steps=None)

# fit() function Trains the model for a fixed number of epochs (iterations on a dataset).

# it returns A History object. Its History.history attribute is a record of training loss values and 
# metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).

# just like sklearn--> model fit(model train)
history = model.fit(X_train, Y_train, steps_per_epoch=500, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) 


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# MLP + SigmoidActivation + SGD optimizer

In [19]:
# MLP architecture 512 128
# 512 means first hidden layer having 512 Node(on each Node sigmoaid activation ftn) similarly for 128 on 2nd hidden layer

# Sequential() model b/z we sequentially send the data from one layer to another
sigmoid_model = Sequential()
sigmoid_model.add(Dense(512, activation='sigmoid', input_shape=(input_dim,)))
sigmoid_model.add(Dense(128, activation='sigmoid'))
# outpu_dim represnt No of Node on softmax layer
# here we have 10 node
sigmoid_model.add(Dense(output_dim, activation='softmax'))
sigmoid_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 512)               401920    
                                                                 
 dense_2 (Dense)             (None, 128)               65664     
                                                                 
 dense_3 (Dense)             (None, 10)                1290      
                                                                 
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________


In [21]:
# before training the NN-model, we need to configure the learning process with the help of (compile method)
sigmoid_model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])
history_2 = sigmoid_model.fit(X_train,Y_train, steps_per_epoch=500, verbose=1, epochs=nb_epoch,validation_data=(X_test,Y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# MLP + SigmoidActivation + Adam

In [23]:
sigmoid_model = Sequential()
sigmoid_model.add(Dense(512, activation='sigmoid', input_shape=(input_dim,)))
sigmoid_model.add(Dense(128, activation='sigmoid'))
# output_dim represnt number of node on softmax layer
# here number of node is 10
sigmoid_model.add(Dense(output_dim,activation='softmax'))
sigmoid_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 512)               401920    
                                                                 
 dense_7 (Dense)             (None, 128)               65664     
                                                                 
 dense_8 (Dense)             (None, 10)                1290      
                                                                 
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________


In [25]:
# before traning our NN-model, we need to configure learning process with the help of  (compile method)
sigmoid_model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

# after configure learning process ,,NN-model train
sigmoid_model.fit(X_train,Y_train,batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test,Y_test))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x160814652e0>

# MLP + ReLu + adam optimizer

In [29]:
ReLu_model = Sequential()
ReLu_model.add(Dense(512, activation='relu', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.062, seed=None)))
ReLu_model.add(Dense(128, activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.125, seed=None)))
# output_dim represnt number of node on softmax layer
# here number of node is 10
ReLu_model.add(Dense(output_dim,activation='softmax'))
ReLu_model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 512)               401920    
                                                                 
 dense_16 (Dense)            (None, 128)               65664     
                                                                 
 dense_17 (Dense)            (None, 10)                1290      
                                                                 
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________


In [30]:
# before traning our NN-model, we need to configure learning process with the help of  (compile method)
ReLu_model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

# after configure learning process ,,NN-model train
ReLu_model.fit(X_train,Y_train,batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test,Y_test))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1609fe9f220>

# MLP + ReLu + sgd

In [31]:
ReLu_model = Sequential()
ReLu_model.add(Dense(512, activation='relu', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.062, seed=None)))
ReLu_model.add(Dense(128, activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.125, seed=None)))
# output_dim represnt number of node on softmax layer
# here number of node is 10
ReLu_model.add(Dense(output_dim,activation='softmax'))
ReLu_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 512)               401920    
                                                                 
 dense_19 (Dense)            (None, 128)               65664     
                                                                 
 dense_20 (Dense)            (None, 10)                1290      
                                                                 
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________


In [32]:
# before traning our NN-model, we need to configure learning process with the help of  (compile method)
ReLu_model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])

# after configure learning process ,,NN-model train
ReLu_model.fit(X_train,Y_train,batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test,Y_test))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x160aeb571f0>