<a href="https://colab.research.google.com/github/hyguozz/CNN-for-Classification-using-the-Fashion-MNIST-dataset/blob/main/LeNet5_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implementation of LeNet-5 Using Keras


In [None]:
import os
import time
import tensorflow as tf
from keras.models import Sequential
from keras import models, layers
import keras
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
from keras.utils import np_utils
from keras.regularizers import l1, l2
import keras.backend as K

## Download Data Set & Normalize

In [None]:
# Load dataset as train and test sets
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalize value to [0, 1]
x_train /= 255
x_test /= 255

# Transform lables to one-hot encoding
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

# Reshape the dataset into 4D array
x_train = x_train.reshape(x_train.shape[0], 28,28,1)
x_test = x_test.reshape(x_test.shape[0], 28,28,1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
print(' Size of x_train: ',x_train.shape, '\n', 'Size of x_test: ',  x_test.shape)

 Size of x_train:  (60000, 28, 28, 1) 
 Size of x_test:  (10000, 28, 28, 1)


In [None]:
BATCHSIZE = 128
EPOCH = 15

## Q1 : Define LeNet-5 Model:   Baseline Model 

In [None]:
def model_baseline():
    #Instantiate an empty model
    model = Sequential()

    # Convolutional Layer: Conv1(f=5, s=1) 6 filters
    model.add(layers.Conv2D(6, kernel_size=(5, 5), strides=(1, 1), activation='relu', 
                            input_shape=(28,28,1), padding='valid'))
    # Pooling Layer: Pool1 (2*2, s=2)
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    # Convolutional Layer: Conv2(f=5, s=1) 16 filters
    model.add(layers.Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='valid'))
    # Pooling Layer: Pool2 (2*2, s=2)
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    # Fully connected layers 
    model.add(layers.Flatten())
    model.add(layers.Dense(120, activation='relu'))
    model.add(layers.Dense(84, activation='relu'))
    #Output Layer
    model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer='SGD', metrics=['accuracy'])

    return model

## Q2:  Add L2 weight decay regularization
## Q3:  Add L1 weight decay regularization


In [None]:
# Using kernel_regularizer
# parameter lamda: regularization strengths, such as, 1e-4, 1e-3
# reg_type = 1: L1 regularization
# reg_type = 2: L2 regularization
def model_reg( reg_type = 1, lamda = 1e-3 ):   
    if reg_type == 1:
        reg = l1(lamda)
    else:
        reg = l2(lamda)
    
    model = Sequential()
    model.add(layers.Conv2D(6, kernel_size=(5, 5), strides=(1, 1), 
                            activation='relu', input_shape=(28,28,1), padding='valid',
                            kernel_regularizer = reg ))
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    model.add(layers.Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='valid', kernel_regularizer = reg))
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    model.add(layers.Flatten())
    model.add(layers.Dense(120, activation='relu',kernel_regularizer = reg))
    model.add(layers.Dense(84, activation='relu',kernel_regularizer = reg))
    model.add(layers.Dense(10, activation='softmax',kernel_regularizer = reg))
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer='SGD', metrics=['accuracy'])

    return model

## Q4: Global Average Pooling 

In [None]:
# Q4: Global Average Pooling 
def model_GAP():
    model = Sequential()
    model.add(layers.Conv2D(6, kernel_size=(5, 5), strides=(1, 1), 
                            activation='relu', input_shape=(28,28,1), padding='valid'))
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    model.add(layers.Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='valid'))
    model.add(layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer='SGD', metrics=['accuracy'])
    return model

## Question 4:
## Calculate the number parameters in model

In [None]:
# Calculate the number of model for Question 4.
def model_size(model): # Compute number of params in a model (the actual number of floats)
    return sum([np.prod(K.get_value(w).shape) for w in model.trainable_weights])


## Question 6:
## Analyze the weights of the regularized models

In [None]:
# Question 6:
# Analyze the weights of the regularized models
# Hoyer_index
def Hoyer_index(w_fc):
    sum_cj = tf.reduce_sum(tf.abs(w_fc)) 
    sqrt_sumsquare_cj = tf.sqrt(tf.reduce_sum(tf.square(w_fc))) 
    # Number of elements of FC layer
    N = int(tf.size(w_fc)) 
    # Hoyer's index
    return ((np.sqrt(N) - sum_cj/ sqrt_sumsquare_cj)*(1/(np.sqrt(N)-1)))


## Question 5:  Train/test models three times


In [None]:
# define function to call different models
def run3(model_type = 1, reg = 1, lamda = 1e-3):
    Train_loss = []
    Test_loss = []
    Time_cost = []
    Train_acc = []
    Test_acc = []
    for k in range(3):
        if model_type == 1:
            model = model_baseline()
        elif model_type == 2:
            model = model_reg(reg, lamda)
        else:
            model = model_GAP()
        
        start_time = time.time()
        with tf.device('/device:GPU:0'):
            hist = model.fit(x = x_train,y = y_train, epochs= EPOCH, batch_size=BATCHSIZE, validation_split = 0.15,
                            verbose=0) 
        end_time = time.time()
        Train_loss.append(hist.history['loss'][-1])
        Train_acc.append(hist.history['accuracy'][-1])
        Time_cost.append(end_time - start_time)
        # test the model 
        test_score = model.evaluate(x_test, y_test)
        Test_loss.append(test_score[0])
        Test_acc.append(test_score[1])
    print(model.summary())        
    if (model_type != 3):
        print('--- Sparsity measurements ---')        
        # FC layer 1
        Hoyer_layer1 = Hoyer_index(model.layers[5].get_weights()[0])  
        # FC layer 2
        Hoyer_layer2 = Hoyer_index(model.layers[6].get_weights()[0])
        print('Hoyer_layer1:',float(Hoyer_layer1), '  Hoyer_layer2:', float(Hoyer_layer2),'\n')
    
    print('--- Time cost ---')
    print('Time_cost:      %.2f, %.2f, %.2f' % (float(Time_cost[0]) , float(Time_cost[1]), float(Time_cost[2])))
    print('Mean_Time_cost:', np.mean(Time_cost), '(+/-',np.std(Time_cost),')\n')
    print('-- Train error,  Test error --')
    print('Train_error: ', Train_loss[0] , Train_loss[1],Train_loss[2])
    print('Mean_Train_error:', np.mean(Train_loss), '(+/-',np.std(Train_loss),')\n')

    print('Test_error :   ', Test_loss[0] , Test_loss[1], Test_loss[2])
    print('Mean_Test_error:', np.mean(Test_loss), '(+/-',np.std(Test_loss),')\n')
    print('-- Train accuracy,  Test accuracy --')
    print('Train_acc:  ' , Train_acc[0] , Train_acc[1], Train_acc[2])
    print('Mean_Train_acc:', np.mean(Train_acc), '(+/-',np.std(Train_acc),')\n')
    print('Test_acc:   ' , Test_acc[0] ,Test_acc[1], Test_acc[2])
    print('Mean_Test_acc:', np.mean(Test_acc), '(+/-',np.std(Test_acc),')')

    # Question 4: calculate the number of parameters for this model
    Model_Size= model_size(model)

    return Model_Size
    

## Question 5: Train/test models three times
## Output the results for Q4, Q5, Q6

In [None]:
# Question 5: Train/test models three times
print('\n================ Baseline model ================\n')
model_id = 1 # baseline model
Num_para_baseline = run3(model_type = model_id)

# Add L2 weight decay regularization
print('\n =============== L2, weight_decay = 1e-3 ===============\n')
model_id = 2 # model with L1 or L2 
# set regularization parameters
regl_type = 2 # regularization type, 1: L1,  2: L2
lamd = 1e-3   # regularization strength
run3(model_id, regl_type, lamd) #

print('\n =============== L2, weight_decay = 1e-4 ===============\n')
model_id = 2 # model with L1 or L2 
regl_type = 2 # regularization type, 1: L1,  2: L2
lamd = 1e-4   # regularization strength
run3(model_id, regl_type, lamd) #

# Add L1 weight decay regularization
print('\n =============== L1, weight_decay = 1e-3 ===============\n')
model_id = 2 # model with L1 or L2 
regl_type = 1 # regularization type, 1: L1,  2: L2
lamd = 1e-3   # regularization strength
run3(model_id, regl_type, lamd) #

print('\n =============== L1, weight_decay = 1e-4 ===============\n')
model_id = 2 # model with L1 or L2 
regl_type = 1 # regularization type, 1: L1,  2: L2
lamd = 1e-4   # regularization strength
run3(model_id, regl_type, lamd) #

# Q4: model with Global Average Pooling
print('\n =============== Q4: model with Global Average Pooling ===============\n')
model_id = 3 # model with Global Average Pooling
Num_para_GAP = run3(model_id)
print('\n=============== Question 4 ===============\n')
diff_num_para = Num_para_baseline - Num_para_GAP
print('Number of parameters in Baseline model:  ', Num_para_baseline  )
print('Number of parameters in GAP model:  ', Num_para_GAP  )
print('The difference of number of parameters between Baseline model and the model using GAP: \n', \
      diff_num_para)




Model: "sequential_59"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_118 (Conv2D)          (None, 24, 24, 6)         156       
_________________________________________________________________
average_pooling2d_118 (Avera (None, 12, 12, 6)         0         
_________________________________________________________________
conv2d_119 (Conv2D)          (None, 8, 8, 16)          2416      
_________________________________________________________________
average_pooling2d_119 (Avera (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_56 (Flatten)         (None, 256)               0         
_________________________________________________________________
dense_171 (Dense)            (None, 120)               30840     
_________________________________________________________________
dense_172 (Dense)            (None, 84)            