---

  # Deep Learning and VGG16 Transfer Learning based Flower Recognition
This is the code for the project entitled "**Deep Learning and VGG16 Transfer Learning based Flower Recognition**" submitted as the course project of **ECE 9039@UWO**<br>  
Author: Zhijun Li (zli2945@uwo.ca) and Liang Zhang (lzha46@uwo.ca)<br>
Date: 01 March, 2023<br>
  

**Notebook 0: Only valid for Grid Search HPO process for number of layers in hand-designed CNN model**  
Purpose:  
&nbsp; 1): Find the optimzied number of layers in conv2d model

---

## Import libraries

In [2]:
# ignore the warnings for smooth progress
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
#import tensorflow as tf
#import random as rn
 
# data preparation
import cv2           
import os  

# data preprocess
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

# model selection and evaluation metrics
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,\
confusion_matrix, ConfusionMatrixDisplay, classification_report

# cnn model libraraies
from keras.models import Sequential
from keras.optimizers import Adam, SGD
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D  
    
# HPO related
from hyperopt import hp, fmin, rand, STATUS_OK
from hyperopt.early_stop import no_progress_loss
from keras.wrappers.scikit_learn import KerasClassifier

In [3]:
#from google.colab import drive
#drive.mount('/content/drive')

In [4]:
np.random.seed(42)

## Data preparation

In [2]:
def dataset_prep(flower_name_list, train_or_test): 
    #IMG_SIZE=224 #150
    folder_name = 'example_modify_dataset'
    
    for flower_name in flower_name_list:
        Opt_DataCnt = 0
        full_path = os.path.join(folder_name, train_or_test, flower_name)

        # os.listdir: get all files under DIR
        for pic in os.listdir(full_path):
            pic_path = os.path.join(full_path,pic) #full_path + '/' + pic
            # read img from path with default setting 'IMREAD_COLOR'
            this_pic = cv2.imread(pic_path, cv2.IMREAD_COLOR)
            this_pic = cv2.resize(this_pic, (IMG_SIZE, IMG_SIZE))
            if(train_or_test == 'train'):
                X_train.append(np.array(this_pic))
                y_train.append(str(flower_name))
                if(Opt_DataCnt < 200):
                    X_train_HPO.append(np.array(this_pic))
                    y_train_HPO.append(str(flower_name))
            elif(train_or_test == 'test'):
                X_test.append(np.array(this_pic))
                y_test.append(str(flower_name))
            else:
                print("Error! input 'train_or_test'-{0} is not supported!".format(train_or_test))
                break
            Opt_DataCnt += 1
        print("Finish processing {0}\nX_train's size: {1:5d}\tX_test's size: {2}\
        y_train's size: {3:5d}\ty_test's size: {4}\n".format(flower_name,len(X_train),len(X_test),len(y_train),len(y_test)))

In [3]:
X_train = []
X_test = []
y_train = []
y_test = []
X_train_HPO = []
y_train_HPO = []
IMG_SIZE=224 #150
    
flower_name_list = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']
dataset_prep(flower_name_list, 'train')
print("\n")
dataset_prep(flower_name_list, 'test')


Finish processing daisy
X_train's size:   754	X_test's size: 0        y_train's size:   754	y_test's size: 0

Finish processing dandelion
X_train's size:  1796	X_test's size: 0        y_train's size:  1796	y_test's size: 0

Finish processing rose
X_train's size:  2570	X_test's size: 0        y_train's size:  2570	y_test's size: 0

Finish processing sunflower
X_train's size:  3293	X_test's size: 0        y_train's size:  3293	y_test's size: 0

Finish processing tulip
X_train's size:  4267	X_test's size: 0        y_train's size:  4267	y_test's size: 0



Finish processing daisy
X_train's size:  4267	X_test's size: 10        y_train's size:  4267	y_test's size: 10

Finish processing dandelion
X_train's size:  4267	X_test's size: 20        y_train's size:  4267	y_test's size: 20

Finish processing rose
X_train's size:  4267	X_test's size: 30        y_train's size:  4267	y_test's size: 30

Finish processing sunflower
X_train's size:  4267	X_test's size: 40        y_train's size:  4267	y_tes

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.25, random_state=42) 

## Data preprocessing

In [7]:
def XY_prep(X, y): 
    # label encoding
    le=LabelEncoder()
    y=le.fit_transform(y)
    # transform Y to binary one-hot encoding
    y=to_categorical(y,5)
    X=np.array(X)
    # Convergence, scaling the pixel values between [0, 1], avoids neuron fall into 'death zone'
    X=X/255
    return X, y

In [5]:
if 0:
    print('train:')
    print(np.shape(X_train))
    print(np.shape(y_train))
    print('test:')
    print(np.shape(X_test))
    print(np.shape(y_test))
    print('train_HPO:')
    print(np.shape(X_train_HPO))
    print(np.shape(y_train_HPO))

In [9]:
X_train_coded, y_train_coded = XY_prep(X_train, y_train)
X_test_coded, y_test_coded = XY_prep(X_test, y_test)
X_train_HPO_coded, y_train_HPO_coded = XY_prep(X_train_HPO, y_train_HPO)

In [6]:
if 0:
    print('train_coded:')
    print(np.shape(X_train_coded))
    print(np.shape(y_train_coded))
    print('test_coded:')
    print(np.shape(X_test_coded))
    print(np.shape(y_test_coded))
    print('train_HPO_coded:')
    print(np.shape(X_train_HPO_coded))
    print(np.shape(y_train_HPO_coded))

## HPO method construction and execution

In [15]:
def Grid_Search_CNN_Layers(num_layers = 1):
    # create a sequential model object
    myGSModel = Sequential()
    if(num_layers >= 1):
      # add an input layer with hidden_neurons units
      myGSModel.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation ='relu', input_shape = (IMG_SIZE,IMG_SIZE,3)))
      myGSModel.add(MaxPooling2D(pool_size=(2,2)))
    
    if(num_layers >= 2):
      myGSModel.add(Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation ='relu'))
      myGSModel.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    if(num_layers >= 3):
      myGSModel.add(Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation ='relu'))
      myGSModel.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    if(num_layers >= 4):
      myGSModel.add(Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation ='relu'))
      myGSModel.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    # convert multidimension into one dimension
    myGSModel.add(Flatten())
    myGSModel.add(Dense(512))
    myGSModel.add(Activation('relu'))
    # below 5 is the number of flowers' classes
    myGSModel.add(Dense(5, activation = "softmax"))
    
    # compile the model specifying an optimizer and a loss function
    myGSModel.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return myGSModel

### Grid Search
param_grid = {
    'num_layers': [1, 2, 3, 4]
}
# to use a keras model with sklearn we need to call a wrapper function where the build function is Grid_Search_NN_model
hp_model = KerasClassifier(build_fn=Grid_Search_CNN_Layers, verbose=0)
# instantiate gridsearch object using 3 fold crossvaliadtion
grid=GridSearchCV(estimator = hp_model, param_grid = param_grid, refit=True, cv=3)
# fit the gridsearch object on the data
grid_result = grid.fit(X_train_HPO, y_train_HPO, epochs=10, verbose=0)
# determine the best parameter and the best score (it is accuracy in metrics so it is best accuracy score)
print('Best parameters: ', grid_result.best_params_)
print('Best accuracy score: ', grid_result.best_score_)

2023-03-16 11:34:46.061518: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Best parameters:  {'num_layers': 4}
Best accuracy score:  0.553044060866038
