# Classic CNN Notebook

## Import Libraries and Seed

In [None]:
import os
import random
from datetime import datetime

import splitfolders

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image

import tensorflow as tf
from keras.models import load_model
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Checking tensorflow version
tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Setting seed for reproducibility
seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

## Dataset Configuration

In [None]:
# Splitting the main dataset into train and val
dataset_dir = '../datasetNoTest'

if not(os.path.exists('../datasetNoTest')) :
    print('splitting')
    splitfolders.ratio('dataset', output='datasetNoTest', seed=seed, ratio=(0.8, 0.2))

# Setting dataset directories
training_dir = os.path.join(dataset_dir, 'train')
validation_dir = os.path.join(dataset_dir, 'val')

## Model Parameters and Classes Weights

In [None]:
# Labels of the dataset for classification
labels = ['Apple',              # 0
          'Blueberry',          # 1
          "Cherry",             # 2
          "Corn",               # 3
          "Grape",              # 4
          "Orange",             # 5
          "Peach",              # 6
          "Pepper",             # 7
          "Potato",             # 8
          "Raspberry",          # 9
          "Soybean",            # 10
          "Squash",             # 11
          "Strawberry",         # 12
          "Tomato"]             # 13

In [None]:
# Input Parameters
img_w = 256
img_h = 256
input_shape = (256, 256, 3)
classes = 14

# Training Parameters
epochs = 90
batch_size = 64
reg_rate = 0.001

# Earlystopping Parameters
early_stopping = False
patience_epochs = 9

In [None]:
# This calculate the weights for all the classes
# by counting the number of images for each class
# and dividing by the number of total images
category_weight = {}
elements_per_class = {}

for i in range(classes):
    category_weight[i] = 0.0

for i in range(classes):
    elements_per_class[i] = 0

_, classes_directories, _ = next(os.walk(training_dir))

for img_class in classes_directories:
    class_dir = training_dir + '/' + str(img_class)
    _, _, files = next(os.walk(class_dir))
    elements_per_class[labels.index(img_class)] = len(files)

total_images = sum(elements_per_class.values())

for i in category_weight.keys():
    category_weight[i] = total_images / (classes * elements_per_class[i])

## Data Augmentation

In [None]:
aug_train_data_gen =  ImageDataGenerator(rotation_range=10,
                                         width_shift_range=0.2,
                                         height_shift_range=0.2,
                                         zoom_range=0.2,
                                         horizontal_flip=True,
                                         brightness_range=[0.2,1.2],
                                         vertical_flip=True,
                                         fill_mode='nearest',
                                         rescale=1/255.) 

valid_data_gen = ImageDataGenerator(rescale=1/255.)

In [None]:

aug_train_gen = aug_train_data_gen.flow_from_directory(directory=training_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=labels,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               seed=seed)

valid_gen = valid_data_gen.flow_from_directory(directory=validation_dir,
                                               target_size=(256, 256),
                                               color_mode='rgb',
                                               classes=labels,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               seed=seed)

## Neural Network Model

In [None]:
def build_model(input_shape):


    # Layer Input -------------------------------------------------------
    input_layer = tfkl.Input(shape=input_shape, name='Input')
    


    # Layer 1 -----------------------------------------------------------
    conv1 = tfkl.Conv2D(
        filters=25,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(input_layer)
    
    conv1 = tfkl.BatchNormalization()(conv1)
    
    leaky_relu_layer1 = tfkl.LeakyReLU()(conv1)
    
    pool1 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer1)

    

    # Layer 2 -----------------------------------------------------------
    conv2 = tfkl.Conv2D(
        filters=50,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool1)
    
    conv2 = tfkl.BatchNormalization()(conv2)
    
    leaky_relu_layer2 = tfkl.LeakyReLU()(conv2)
    
    pool2 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer2)
    


    # Layer 3 -----------------------------------------------------------
    conv3 = tfkl.Conv2D(
        filters=100,
        kernel_size=(3, 3),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool2)
    
    conv3 = tfkl.BatchNormalization()(conv3)
    
    leaky_relu_layer3 = tfkl.LeakyReLU()(conv3)
    
    pool3 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer3)

    

    # Layer 4 -----------------------------------------------------------
    conv4 = tfkl.Conv2D(
        filters=200,
        kernel_size=(5, 5),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool3)
    
    conv4 = tfkl.BatchNormalization()(conv4)
    
    leaky_relu_layer4 = tfkl.LeakyReLU()(conv4)
    
    pool4 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer4)

    

    # Layer 5 -----------------------------------------------------------
    conv5 = tfkl.Conv2D(
        filters=300,
        kernel_size=(5, 5),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool4)
    
    conv5 = tfkl.BatchNormalization()(conv5)
    
    leaky_relu_layer5 = tfkl.LeakyReLU()(conv5)
    
    pool5 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer5)

    

    # Layer 6 -----------------------------------------------------------
    conv6 = tfkl.Conv2D(
        filters=400,
        kernel_size=(5, 5),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool5)
    
    conv6 = tfkl.BatchNormalization()(conv6)
    
    leaky_relu_layer6 = tfkl.LeakyReLU()(conv6)
    
    pool6 = tfkl.MaxPooling2D(
        pool_size = (2, 2)
    )(leaky_relu_layer6)

    

    # Layer 7 -----------------------------------------------------------
    conv7 = tfkl.Conv2D(
        filters=500,
        kernel_size=(5, 5),
        strides = (1, 1),
        padding = 'same',
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        kernel_regularizer = l2(0.01)
    )(pool6)
    
    conv7 = tfkl.BatchNormalization()(conv7)
    
    leaky_relu_layer7 = tfkl.LeakyReLU()(conv7)
    
    
    # Global Average Pooling -----------------------------------------------------------
    glob_pooling = tfkl.GlobalAveragePooling2D(name='GlobalPooling')(leaky_relu_layer7)


    # Dense Layer -----------------------------------------------------------
    classifier_layer1 = tfkl.Dense(units=512, name='Classifier1', kernel_initializer=tfk.initializers.GlorotUniform(seed), kernel_regularizer = l2(0.01))(glob_pooling)
    
    classifier_layer1 = tfkl.BatchNormalization()(classifier_layer1)
    
    leaky_relu_layer = tfkl.LeakyReLU()(classifier_layer1)
    
    leaky_relu_layer = tfkl.Dropout(0.3, seed=seed)(leaky_relu_layer)


    # Output Layer -----------------------------------------------------------
    output_layer = tfkl.Dense(units=14, activation='softmax', kernel_initializer=tfk.initializers.GlorotUniform(seed), name='Output')(leaky_relu_layer)

    
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    model.compile(
        loss=tfk.losses.CategoricalCrossentropy(),
        optimizer=tfk.optimizers.Adam(),
        metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
    )

    return model

In [None]:
# Build the model and print the shape
model = build_model(input_shape)
model.summary()

## Callbacks

In [None]:
# Learning Rate Scheduler
def scheduler(epoch, lr):
   if epoch < 10:
     return lr
   else:
     return lr * tf.math.exp(-0.1)

In [None]:
# Utility function to create folders and callbacks for training

def create_folders_and_callbacks(model_name) :
    exps_dir = os.path.join('data_augmentation_experiments')
    if not os.path.exists(exps_dir):
        os.makedirs(exps_dir)

    now = datetime.now().strftime('%b%d_%H-%M-%S')
    
    exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
      
    callbacks = []

    # Model checkpoint ---------------------------------------------------
    ckpt_dir = os.path.join(exp_dir, 'ckpts')
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp'), # filepath is where I want to save the model
                                                     save_weights_only=False, # save only the weights ora all the model
                                                     save_best_only=True) # if True saves only the results of the best epoch
                                                                              
    callbacks.append(ckpt_callback)

    # Visualize Learning on Tensorboard ----------------------------------
    tb_dir = os.path.join(exp_dir, 'tb_logs') # logs where we save the events, where the tensorboard will read the logs
    if not os.path.exists(tb_dir):
        os.makedirs(tb_dir)
      
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                              profile_batch=0,
                                              histogram_freq=1)
    callbacks.append(tb_callback)

    # Early Stopping -----------------------------------------------------
    if early_stopping:
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience_epochs, restore_best_weights=True)
        callbacks.append(es_callback)
    
    # Learning Rate Scheduler --------------------------------------------
    LRS_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
    callbacks.append(LRS_callback)
    
    return callbacks

## Training

In [None]:
callbacks = create_folders_and_callbacks(model_name='Classic_CNN')

history = model.fit(
    x = aug_train_gen,
    class_weight = category_weight,
    epochs = epochs,
    validation_data = valid_gen,
   callbacks = callbacks,
).history

In [None]:
# Saving the last epoch of the train
save_dir = os.path.join('Classic_CNN')
model.save(save_dir)

## Some Nice Graphs

In [None]:
# All the metrics : Accuracy, Precision and Recall
ALPHA = 0.5

plt.figure(figsize=(20,10))

plt.plot(history['accuracy'], label='Accuracy Train', alpha=ALPHA, color='#E64A19')
plt.plot(history['val_accuracy'], label='Accuracy Val', alpha=ALPHA, color='#F57C00')

plt.plot(history['precision'], label='Precision Train', alpha=ALPHA, color='#388E3C')
plt.plot(history['val_precision'], label='Precision Val', alpha=ALPHA, color='#689F38')

plt.plot(history['recall'], label='Recall Train', alpha=ALPHA, color='#303F9F')
plt.plot(history['val_recall'], label='Recall Val', alpha=ALPHA, color='#1976D2')

plt.ylim(.5, 1)
plt.title('Metrics')
plt.legend(loc='lower right')
plt.grid(alpha=.3)
plt.show()

In [None]:
# Accuracy Graph
plt.figure(figsize=(20,10))

plt.plot(history['accuracy'], label='Accuracy Train', alpha=ALPHA, color='#E64A19')
plt.plot(history['val_accuracy'], label='Accuracy Val', alpha=ALPHA, color='#F57C00')

plt.ylim(.5, 1)
plt.title('Accuracy')
plt.legend(loc='lower right')
plt.grid(alpha=.3)
plt.show()

In [None]:
# Precision Graph
plt.figure(figsize=(20,10))

plt.plot(history['precision'], label='Precision Train', alpha=ALPHA, color='#388E3C')
plt.plot(history['val_precision'], label='Precision Val', alpha=ALPHA, color='#689F38')

plt.ylim(.5, 1)
plt.title('Precision')
plt.legend(loc='lower right')
plt.grid(alpha=.3)
plt.show()

In [None]:
# Recall Graph
plt.figure(figsize=(20,10))

plt.plot(history['recall'], label='Recall Train', alpha=ALPHA, color='#303F9F')
plt.plot(history['val_recall'], label='Recall Val', alpha=ALPHA, color='#1976D2')

plt.ylim(.5, 1)
plt.title('Recall')
plt.legend(loc='lower right')
plt.grid(alpha=.3)
plt.show()

In [None]:
# Loss Graph
plt.figure(figsize=(15,10))

plt.plot(history['loss'], label='Loss Train', alpha=ALPHA, color='#ff7f0e')
plt.plot(history['val_loss'], label='Loss Val', alpha=ALPHA, color='#4D61E2')

plt.ylim(0, 4)
plt.title('Loss')
plt.legend(loc='upper right')
plt.grid(alpha=.3)
plt.show()