# Template

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.utils import plot_model 
import os
import numpy as np
import matplotlib.pyplot as plt
from contextlib import redirect_stdout
import pandas as pd
import pydot_ng as pydot

## Getting and saving the data

In [None]:
# CONFIGURE
local_path = "/home/sciapps/Documents/Repos/tfm"
model_name = "vgg19_b1b2b3_pretrained"

In [None]:
# DATA SET DIRECTORIES
source_dir = "data/images/image_preprocessing/processed_images_train_val_test/"
train_dir = os.path.join(local_path, source_dir, "train")
val_dir = os.path.join(local_path, source_dir, "val")
test_dir = os.path.join(local_path, source_dir, "test")

In [None]:
# OUTPUTS
save_dir = os.path.join(os.path.abspath(os.getcwd()), "outputs", model_name)
# Create outputs folder
if not os.path.exists(save_dir):
    os.makedirs(save_dir, exist_ok=True)

In [None]:
# LABELS
class_names = sorted(os.listdir(train_dir))
print(class_names)

## Image decodification

`ImageDataGenrator`:

- Read images from the disk.
- Decode images in arrays of float pixel values (here RGB).
- Rescale the floats in the arrays from values between 0 and 255 to 0 and 1.
- Perform real-time image augmentation.

`flow_from_directory`:

- Generate the batches of array image data (aka tensors) with the real-time data augmentation defined in the `ImageDataGenerator`.
- Resize the arrays.

In [None]:
# CONFIGURATION ImageDataGenerator 
img_height = 224 
img_width = 224
color_mode= "rgb"
class_mode="categorical"                                  
shuffle=True                                                               
seed = 1234 

In [None]:
def plot_images(images_arr):
    fig, axes = plt.subplots(1, 6, figsize=(15,15))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

### Without augmentation

In [None]:
train_datagen_no_aug = ImageDataGenerator(rescale=1./255)  
train_array_no_aug = train_datagen_no_aug.flow_from_directory(directory = train_dir,
                                            target_size=(img_width, img_height),
                                            color_mode = color_mode,
                                            shuffle = shuffle,
                                            class_mode = class_mode,
                                            subset = "training",
                                            seed=seed
                                            ) 

In [None]:
sample_training_images, _ = next(train_array_no_aug)
plot_images(sample_training_images[:24])


### Applying augmentation

Aim: increase the number of examples by randomly applying transformations to the original images. It also prevents overfitting of the model. 

Augmnetation methods applied:

- Rotation
- Vertica flip
- Horizontal flip
- Brightness range  (Values less than 1.0 darken the image, e.g. [0.5, 1.0], whereas values larger than 1.0 brighten the image, e.g. [1.0, 1.5], where 1.0 has no effect on brightness)
- Zooming
- Shear range

Fill mode for empty pixels when rotating the image is set to "reflect", so that, being the letters between brachets the pixels of the image, the area outside is filled as follow: abcddcba|abcd|dcbaabcd

In [None]:
# Data augmentation in train dataset 
train_datagen = ImageDataGenerator(rescale=1./255,
                                            brightness_range = [0.2,1.5],
                                            zoom_range = [0.5,1.0],
                                            rotation_range=45,
                                            horizontal_flip=True,
                                            vertical_flip=True,
                                            shear_range = 0.2,
                                            fill_mode = "reflect") 

train_array = train_datagen.flow_from_directory(directory = train_dir,
                                            target_size=(img_width, img_height),
                                            color_mode = color_mode,
                                            shuffle = shuffle,
                                            class_mode = class_mode,
                                            #subset = "training",
                                            seed=seed
                                            ) 

In [None]:
# Validation dataset
val_datagen = ImageDataGenerator(rescale=1./255) 

validation_array = val_datagen.flow_from_directory(val_dir, 
                                                    target_size=(img_width, img_height),
                                                    color_mode = color_mode,
                                                    class_mode= class_mode,
                                                    #subset='validation',
                                                    seed=seed)

In [None]:
augmented_images = [train_array[0][0][0] for i in range(6)]
plot_images(augmented_images)

## Creating and training the model

### Model network architecture

The simplest network architecture constists of 3 layers:

- Input layer, with a number of nodes equal to the number of features in the model.
- Hidden layer, with a variable number of nodes. 
- Output layer, with a number of nodes equal to the number of classes. 

#### The hidden layers

The hidden layers can have different characteristics depending of their use. 
The transformations applied by the convolutional layers have shown the best results for image classification, so they will be the main component in this model.

##### The convolutional layer 

The main characteristic of a convolutional layer is that it applies a filter to each of the elements of a matrix (the pixels of an image). This filter is called the **kernel**. The kernel is a matrix (generally of small size, 2x3, 3x2, 3x3...) with a set of fixed real numbers. Each pixel of the original image is multiplied by the kernel matrix and the result sumed up to output another pixel value for the transformed image. Each time the filter is applied to all the pixels of an image is called a **convolution**.

At this level , the performance of the image feature extraction depends on the values in the kernel and the concatenation of convolutional layers. This is because different filters may be specialized in extracting different features (for example, vertical or horizontal edges) and the sequential input and output values for each layer improves the final output.

In keras, the convolutional layer applied to a 2D matrix is called `Conv2D`.


##### The pooling layer

Pooling, in the context of deep learning and image classification, is a technique by which the dimension of an image is reduced. This is done by applying a filter (a matrix of n x n dimension) to the pixels of the image. For example, a filter of 3 x 3 pixels, would take the 3 x 3 pixels on the top left of the input image, apply a calculation to them as a group (as a **_pool_** of numbers), resulting on a single pixel value for the output image. This filter is set to slide through the image by a fixed amount of pixels. This parameter is called the **stride**. If the stride was three in the example above, then the 3 x 3 filter would go through the image without overlapping. 

One type of pooling calculation -and the most used- is to select the maximum number of the n x n pool. The pooling layer that performs this operation is known as **max pooling layer** (in `keras` is `MaxPooling2D`).

The max pooling layers are often added after a convolutional layer. By reducing the dimesion of the image and selecting the maximum values, it passes to the next layer the most activated pixels. This filter helps reducing the overfitting of the model to the original images. Moreover, it sreduces the computational load. 

##### The fully connected layer

A fully connected layer is an all purpose layer where each node receive the inputs from all the nodes from the previous layer, multiplied by their weights, sumed and transformed by the activation funcion.

In keras, the fully connected layer is called `Dense`.

##### Flatten

In a classification model, the outlput layer must have as many nodes as classes, so that it computes a value for each class. To do that, the last layer needs a 1D array as an input. In a CNN, the arrays representing images have more than one dimension (width, height and color), so this dimensionality must be reduced. The flatten layer transforms the image 2D array into a 1D array before passing it to the last layer (or layers). 


### Creating the model with `Keras`

In `Keras` the model is defined with the `Sequential` method as a linear stack ot layers. The **input layer** is implicit in the first layer (a network with 3 layers will have 2 in `keras Sequential` method).

The **input shape** is into the first layer. The model inputs are the tensors or arrays. Images have 3 dimensions: **width**, **height** and **channels**. The width and the height are measured in pixels and the channels reference the color values (the channel value is 1 if it is in black and white and 3 if it is color in RGB (Red, Green, blue) or HSV (hue, saturation, value) formats - 2 and 4 are black and white or color with an alpha channel (transparency). 

The **activation function** that has to be specified in each layer transforms the input data so that the output doen't have a linear relation with the input. 

### Transfer learning with `Keras`

Aim: to use a pre-existing model that has performed well carrying out a similar task. 


#### VGG16

Ref: 2014 ImageNet competition


In [None]:
# load pre-trained model with the weights
loaded_model = tf.keras.applications.VGG19()


In [None]:
loaded_model.summary()

In [None]:
# Add the layers of vgg16 model to a new sequential model 
model = Sequential()
for layer in loaded_model.layers[:-1]: # remove last layer
    model.add(layer)
# Rename model
model._name = model_name
model.name

In [None]:
limit_layer = 11

In [None]:
# Freeze the weights in the layers of first blocks
for layer in model.layers[:limit_layer]:
    layer.trainable = False
for layer in model.layers[limit_layer:]:
    layer.trainable = True
# Add last layer for categories
model.add(Dense(len(class_names), activation = "softmax"))

In [None]:
model.summary()

# Save model summary
with open(os.path.join(save_dir,"model_summary.txt"), "w") as file:
    with redirect_stdout(file):
        model.summary()

The non-trainable parameters are no longer 0, since it has been selected to freeze the weights of blocks 1 and 2. 

In [None]:
# Plot model architecture and save it as .png
rankdir = "TB" # TB: vertical; LR: horizontal
plot_model(model, to_file = os.path.join(save_dir,"model_plot.png"), 
           show_shapes=True, show_layer_names = True, rankdir = rankdir)

### Training the model

The parameters from the model above represent the **weights**. 
A weight is a number that multiplies the value of the **input node** before passing it to the **output node** in the next layer. 
An output node receive the values from each of the input nodes multiplied by their weights, after they have been sumed and transformed by the **activation function**.


Training the model means to adjust the values of the weights in the subsequent runs (the **epochs**. In every run, the weights are modified based on an optimization algorithm or **optimizer**.
This algorithm tries to minimize the **loss function**. In every run, the model predicts the classes with a certain probability. The loss function measures, through that probability, the error of the predictions. In this context, the **gradient** is the computation of the error in relation to the weight (it is the derivative of the error divided into the derivative of the weight). The gradient is multiplied by a **learning rate** to obtain the new weights. The value of the learning rate is in the order of 10^-3.

**Stocastic gradient descent** (sgd) is one type of optimizer. There are also different types of loss functions, like the **sparse categorical crossentropy**. The value of the optimizer 

#### Configuration

- Epoch: the number of iterations through the network in order to adjust the weights. In general, the higher number of epochs, the better the performance, up to a limit. 
- Batch size: the number of examples passed at a time. If the batch size is equal to the number of examples in the training dataset, then we have one batch per epoch. This is computationally costly and may be limited by the machine power. If the batch size is smaller, then there would be several runs per epoch. If it is too small, the training may be too slow.  

In [None]:
# CONFIGURE
batch_size = 100
epochs = 100  
steps_per_epoch = 4

In [None]:
# COMPILING THE MODEL
# SparseCategoricalCrossentropi directly uses classes labels,
## so that they don't need to be numerically encoded.
optimizer = "sgd" # Options: "sgd", "adam"
model.compile(optimizer=optimizer,
            loss = "categorical_crossentropy",
            metrics=['accuracy'])

In [None]:
# Early stopping (when loss does not fall anymore to avoid overfitting)
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience = 30)

# Checkpoint to save model weights and history before it stops training
checkpoint_filepath = os.path.join(save_dir, "/tmp/checkpoint")
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath,
                                                              save_weights_only = True,
                                                              monitor= "val_accuracy",
                                                              save_best_only = True)

In [None]:
# TRAINING THE MODEL
history = model.fit_generator(
    train_array,
    #batch_size = batch_size,
    steps_per_epoch= 4,
    epochs=epochs,
    verbose=1, # get a progress bar and ETA
    validation_data=validation_array,
    validation_steps=2, # batch_size
    callbacks = [callback, model_checkpoint_callback]
)

# Save model history to csv
history_df = pd.DataFrame(history.history) 
history_df.to_csv(os.path.join(save_dir, "model_history.csv"), sep=",", index=False)

# Save model weights
model.save_weights(os.path.join(save_dir, "weights.h5")) 

# Save model 
# model.save(os.path.join(save_dir, "model.h5"))

### Evaluating the model training

#### Accuracy and loss during training

In [None]:
# Parameters measured during model training
history_dict = history.history
print(history_dict.keys())

In [None]:
try:
    acc = history_dict["acc"]
    val_acc = history_dict["val_acc"]
    loss = history_dict["loss"]
    val_loss = history_dict["val_loss"]
    epochs_range = range(epochs)
except:
    try:
        acc = history_dict["accuracy"]
        val_acc = history_dict["val_accuracy"]
        loss = history_dict["loss"]
        val_loss = history_dict["val_loss"]
        epochs_range = range(epochs)
    except:
        pass    

In [None]:
plt.figure(figsize=(8, 8))
plt.suptitle(model_name)
# Accuracy plots
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label="Training Accuracy")
plt.plot(epochs_range, val_acc, label="Validation Accuracy")
plt.legend(loc="lower right")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
# Loss plots
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label="Training Loss") 
plt.plot(epochs_range, val_loss, label="Validation Loss")
plt.legend(loc="upper right")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.savefig(os.path.join(save_dir,"acc_loss_plot.png"))
plt.show()

#### Overfitting

When the model predicts significantly better the training set than the validation set, it is a sign of overfitting. 

### Saving model, model summary and outputs

In [None]:
# EDIT FOR EACH MODEL
# Model description
model_description = f"""
{model_name}
# load pre-trained model with the weights
vgg16_model = tf.keras.applications.VGG16()
# Add the layers of vgg16 model to a new sequential model 
model = Sequential()
for layer in vgg16_model.layers[:-1]: # remove last layer
    model.add(layer)
# Freeze the weights in the layers
for layer in model.layers:
    layer.trainable = False
# Add last layer for categories
model.add(Dense(len(class_names), activation = "softmax"))
"""

# Save model description
with open(os.path.join(save_dir,"model_description.txt"), "w") as file:
    with redirect_stdout(file):
        print(model_description)

Saving the model means saving:

- the model's configuration (topology)
- the model's weights
- the model's optimizer's state (if any)


To load the model again, it has to be built and then the weights added:

```
# load pre-trained model with the weights
vgg16_model = tf.keras.applications.VGG16()
# Add the layers of vgg16 model to a new sequential model 
model = Sequential()
for layer in vgg16_model.layers[:-1]: # remove last layer
    model.add(layer)
# Freeze the weights in the layers
for layer in model.layers:
    layer.trainable = False
# Add last layer for categories
model.add(Dense(len(class_names), activation = "softmax"))  

cnn.load_weights(os.path.join(save_dir, "model.h5"))
```

To get back the accuracy and loss data:

- Open the CSV with the model history.
- Save it to a dictionary.

```
# Recover saved history from file
history_df = pd.read_csv(os.path.join(save_dir, "model_history.csv"))
history_dict = history_df.to_dict()
try: # the key names vary across tf versions
    acc = np.array(list(history_dict["acc"].values()))
    val_acc = np.array(list(history_dict["val_acc"].values()))
    loss = np.array(list(history_dict["loss"].values()))
    val_loss = np.array(list(history_dict["val_loss"].values()))
    epochs_range = np.array(range(epochs))
except:
    try:
        acc = np.array(list(history_dict["accuracy"].values()))
        val_acc = np.array(list(history_dict["val_accuracy"].values()))
        loss = np.array(list(history_dict["loss"].values()))
        val_loss = np.array(list(history_dict["val_loss"].values()))
        epochs_range = np.array(range(epochs))
    except:
        pass
```

## Evaluating the model

### Get the test dataset

In [None]:
# Test dataset
test_main_dir = os.path.join(local_path, source_dir)
test_batch_size = len(os.listdir(test_dir)) 
test_datagen = ImageDataGenerator(rescale=1./255)  
test_array = test_datagen.flow_from_directory(directory = test_main_dir,  
                                                    classes = ["test"],
                                                    batch_size = test_batch_size,
                                                    target_size=(img_width, img_height),
                                                    color_mode = color_mode,
                                                    shuffle = False,
                                                    class_mode= None,
                                                    seed=seed) 

In [None]:
# TEST LABELS
def get_test_labels(test_files):
    """
    Return a list of labels: 
        "Genus_species"
    Arg.: a list of file names with the structure:
          "Genus_species_occurrencenumber.jpg",
          where Genus_species is the class name,   
    """
    test_labels = []
    for i in range(len(test_files)):
        test_file_split = test_files[i].split("_")
        # Remove occurence number and file extension
        class_name_splitted = test_file_split[:-1]
        class_name = "_".join(class_name_splitted)
        test_labels.append(class_name)
    return test_labels

In [None]:
def test_labels_to_index(test_labels, class_names):
    """
    Return a 1D array of integers with the corresponding
    number for a class.
    Args.: - A list with the class name of each item in 
          the test data set.
           - A sorted list with the possible class names. 
    Eg.: test_labels[1] = "Buxus_sempervirens" corresponds to index 4
         in the list of class names.
    """
    test_labels_index = []
    for i in range(len(test_labels)):
        ind = class_names.index(test_labels[i])
        test_labels_index.append(ind)
    return np.array(test_labels_index)

In [None]:
test_files = os.listdir(test_dir)
test_labels = get_test_labels(test_files)
test_labels[:5]

In [None]:
test_labels_index = test_labels_to_index(test_labels, class_names)
test_labels_index[:5]

### Predict the probability of classifiying each class

In [None]:
# Get the probability of predicting each class for each image
predictions = model.predict_generator(test_array,steps=1,verbose=1)

Predictions is a 2D array with a shape: (number of examples in test, number of classes)

In [None]:
predictions.shape

In [None]:
# Get predicted class for each example
def predicted_class(predictions):
    """
    Return a 1D array with the predicted class for each example.
    Arg.: 2D array predictions of shape (number of examples, number of classes)
    """
    pred_class = []
    for i in range(len(predictions)):
        higher_prob = max(predictions[i])
        ind, = np.where(np.isclose(predictions[i], higher_prob))
        pred_class.append(ind[0])
    return np.array(pred_class)

pred_class = predicted_class(predictions)

### Plot the confussion matrix

In [None]:
test_labels_index

In [None]:
pred_class

In [None]:
# Build the confusion matrix
cm = tf.math.confusion_matrix(test_labels_index, pred_class) 
# Convert from tensor to array
sess = tf.Session()
conf_mat = sess.run(cm)
conf_mat

In [None]:
def plot_confusion_matrix(cm, class_names, model_name):
    """
    Returns a matplotlib figure containing the plotted confusion matrix.

    Args:
    cm (array, shape = [n, n]): a confusion matrix of integer classes
    class_names (array, shape = [n]): String names of the integer classes
    """
    figure = plt.figure(figsize=(20, 20))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.BuGn)
    plt.title("Confusion matrix - "+ model_name, fontsize = 22)
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=90)
    plt.yticks(tick_marks, class_names)
    plt.savefig(os.path.join(save_dir,"conf_matrix.png"))


In [None]:
plot_confusion_matrix(conf_mat, np.array(class_names), model_name)