# Transfer Learning - Part 3 (Food Vision Mini)
## Scaling Up

The previous two notebooks that I am digging into Transfer Learning has been done using only 10 food vision classes, but there are 101 classes in the actual dataset. The purpose of this notebook is to scale up and look at a larger model.

Our goal is to beat the original Food 101 paper with only 10% of the training data. The paper accuracy was 50.76% when training on 101,000 images.

## Imports

In [None]:
import os
import pathlib
import random
import sys
from typing import Tuple

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import tensorflow_hub as hub

from src import utils

In [None]:
tf.config.get_visible_devices()

## Helpers

In [None]:
def print_num_trainable_layers(model):
    print('Total Trainable Variables: ', len(model.trainable_variables))

In [None]:
def generate_checkpoint_callback(checkpoint_path: str, best_only: bool = True) -> tf.keras.callbacks.ModelCheckpoint:
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_weights_only=True,
        save_best_only=best_only,
        save_freq='epoch',
        verbose=1)

    return checkpoint


def generate_csv_logger_callback(filename: str) -> tf.keras.callbacks.CSVLogger:
    logger = tf.keras.callbacks.CSVLogger(f'logs/csv/{filename}')
    return logger

In [None]:
def compare_histories(original_history, new_history, initial_epoch):
    if isinstance(original_history, pd.DataFrame) and isinstance(new_history, pd.DataFrame):
        original_history_df = original_history
        new_history_df = new_history
    else:
        original_history_df = pd.DataFrame(original_history.history)
        new_history_df = pd.DataFrame(new_history.history)

    total_acc = pd.concat([original_history_df['accuracy'], new_history_df['accuracy']])
    total_loss = pd.concat([original_history_df['loss'], new_history_df['loss']])
    total_val_acc = pd.concat([original_history_df['val_accuracy'], new_history_df['val_accuracy']])
    total_val_loss = pd.concat([original_history_df['val_loss'], new_history_df['val_loss']])
    
    # Loss Plots
    plt.figure(figsize=(8,8))
    plt.subplot(2, 1, 1)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epoch-1, initial_epoch-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')

    # Accuracy Plots
    plt.figure(figsize=(8,8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epoch-1, initial_epoch-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

## Exploring the Data

In [None]:
data_directory = pathlib.Path('./data/food-101/101_food_classes_10_percent/')
train_directory = data_directory / 'train'
test_directory = data_directory / 'test'

In [None]:
utils.image.summarize_image_directory(data_directory)

#### Findings

After looking at the directory, looks like there are 75 training images for each category of food, and 250 test images for each category of food.

## Data Inputs

In [None]:
# Constants
img_size = 224

In [None]:
train_data = tf.keras.utils.image_dataset_from_directory(str(train_directory),
                                                         image_size=(img_size, img_size),
                                                         label_mode='categorical')

test_data = tf.keras.utils.image_dataset_from_directory(str(test_directory),
                                                        image_size=(img_size, img_size),
                                                        shuffle=False,
                                                        label_mode='categorical')

In [None]:
num_image_classes = len(train_data.class_names)
num_image_classes

## Creating a Model

Steps to creating the model to identify 101 food classes.

1. Create a model checkpoint callback. (**UPDATE** I ended up adding two additional callbacks to better save checkpoints of the trained model)
2. Create a data augmentation layer to build the data augmentation into the model directly.
3. Build a headless (no top layers) functional EfficientNetB0 base model.
4. Compile our Model.
5. Feature extract for 5 epochs on training data set, and validate on 15% of the test data to save time per epoch.
6. Adjust model and repeate (implement fine tuning, etc...)

In [None]:
model_0_path = 'models/101_food_classes/model_0'

try:
    model_0 = tf.keras.models.load_model(model_0_path)
    train_model_0: bool = False
except OSError:
    train_model_0: bool = True
    
print('Train Model 0: ', train_model_0)

In [None]:
# 1. Callbacks
best_weights_checkpoint_path_0 = 'checkpoints/101_classes_10_percent/model_0/best_epoch/checkpoint.ckpt'
best_weights_checkpoint_callback_0 = generate_checkpoint_callback(best_weights_checkpoint_path_0)

last_epoch_checkpoint_path_0 = 'checkpoints/101_classes_10_percent/model_0/last_epoch/checkpoint.ckpt'
last_epoch_checkpoint_callback_0 = generate_checkpoint_callback(last_epoch_checkpoint_path_0, best_only=False)

csv_logger_name_0 = '101_classes_10_percent/model_0.csv'
csv_logger_callback_0 = generate_csv_logger_callback(csv_logger_name_0)

In [None]:
# 2. Data Augmentation Layer (Scaling layer not required because EfficientNetB0 already has that incorporated)
data_augmentation = tf.keras.models.Sequential([
    preprocessing.RandomFlip('horizontal'),
    preprocessing.RandomRotation(0.2),
    preprocessing.RandomHeight(0.2),
    preprocessing.RandomWidth(0.2),
    preprocessing.RandomZoom(0.2),
])

In [None]:
# 3. Building out the Transfer Learning Model

# EfficientNetB0 Base Model
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False

# Input Layer
inputs = layers.Input(shape=(img_size, img_size, 3), name='InputLayer')

# Incorporate Data Augmentation Layer
x = data_augmentation(inputs)

# Incorporate Base Model
x = base_model(x, training=False)

# Incorporate the Global Average Pooling Layer
x = layers.GlobalAveragePooling2D(name='GlobalAveragePoolingLayer')(x)

# Incorporate Output Layer
outputs = layers.Dense(num_image_classes, activation='softmax', name='OutputLayer')(x)

# Build the model
model_0 = tf.keras.models.Model(inputs, outputs)

In [None]:
model_0.summary()

In [None]:
utils.visualize.visualize_model(model_0)

In [None]:
# 4. Compile Model
if train_model_0:
    model_0.compile(loss='categorical_crossentropy',
                    optimizer=tf.keras.optimizers.legacy.Adam(),
                    metrics=['accuracy'])

In [None]:
initial_epoch_0 = 0
num_epochs_0 = 5
percent_of_validation_data = 0.15

In [None]:
# If fit was interupted before completing the entire 5 epochs of training, uncomment below and update the initial epoch.
# initial_epoch_0 = 1
# model_0.load_weights(last_epoch_checkpoint_path_0)

In [None]:
# 5. Fit Model on 5 epochs
num_epochs_0 = 5
percent_of_validation_data = 0.15

if train_model_0:
    history_0 = model_0.fit(train_data,
                            epochs=num_epochs_0,
                            initial_epoch=initial_epoch_0,
                            steps_per_epoch=len(train_data),
                            validation_data=test_data,
                            validation_steps=int(percent_of_validation_data * len(test_data)),
                            callbacks=[best_weights_checkpoint_callback_0, last_epoch_checkpoint_callback_0, csv_logger_callback_0])

In [None]:
# Saved the history, so going to load in the history from the csv it was saved to
history_0_loaded = pd.read_csv(f'logs/csv/{csv_logger_name_0}', index_col='epoch')
history_0_loaded

In [None]:
model_0_performance = model_0.evaluate(test_data)
model_0_performance

In [None]:
utils.plot.plot_history(history_0_loaded, 'loss')
utils.plot.plot_history(history_0_loaded, 'accuracy')

In [None]:
# Load and Save model
model_0.save(model_0_path)

#### Findings

After looking at the accuracy and loss curves above, looks to be that the training data is overfitting. Going to attempt to reduce overfitting in the next models.

## Fine Tuned Model

To potentially reduce overfitting, I am going  to unfreeze the last 5 layers in the base model and retrain the model. Before I do this, I am going to update the checkpoint criteria due to the time it takes to train each model, that way I can run a few epochs at a time. The correction I made was I setup three total callbacks:

1. Save checkpoint of the best fit weights
2. Save checkpoint of the last epoch weights
3. Save a CSV logger file to store the history of the training.

In [None]:
model_1_path = 'models/101_food_classes/model_1'

try:
    model_1 = tf.keras.models.load_model(model_1_path)
    train_model_1: bool = False
except OSError:
    train_model_1: bool = True

print('Train Model 1: ', train_model_1)

In [None]:
# 1. Callbacks
best_weights_checkpoint_path_1 = 'checkpoints/101_classes_10_percent/model_1/best_epoch/checkpoint.ckpt'
best_weights_checkpoint_callback_1 = generate_checkpoint_callback(best_weights_checkpoint_path_1)

last_epoch_checkpoint_path_1 = 'checkpoints/101_classes_10_percent/model_1/last_epoch/checkpoint.ckpt'
last_epoch_checkpoint_callback_1 = generate_checkpoint_callback(last_epoch_checkpoint_path_1, best_only=False)

csv_logger_name_1 = '101_classes_10_percent/model_1.csv'
csv_logger_callback_1 = generate_csv_logger_callback(csv_logger_name_1)

In [None]:
# 2-3. Starting from model_0 (Reloading in where model_0 left off from)
model_1 = tf.keras.models.clone_model(model_0)
model_1.load_weights(last_epoch_checkpoint_path_0)

# Setting the efficient net model to be trainable
base_model_1 = model_1.layers[2]
base_model_1.trainable = True

# Setting all but last 5 layers in base_model to be not traininable (only last 5 layers are trainable)
for layer in base_model_1.layers[:-5]:
    layer.trainable = False

In [None]:
# Verifying the last 5 layers are trainable
for layer in base_model_1.layers:
    print(layer, layer.trainable)

In [None]:
# 4. Compile Model with lower learning rate (typically 10X what the default learning rate was)
model_1.compile(loss='categorical_crossentropy',
                optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
                metrics=['accuracy'])

In [None]:
# 5. Fit the Model
percent_of_validation_data = 0.15

# The starting epoch
num_epochs_1 = num_epochs_0 + 5

if train_model_1:
    model_1.fit(train_data,
                epochs=num_epochs_1,
                initial_epoch=num_epochs_0,
                steps_per_epoch=len(train_data),
                validation_data=test_data,
                validation_steps=int(percent_of_validation_data * len(test_data)),
                callbacks=[
                    best_weights_checkpoint_callback_1,
                    last_epoch_checkpoint_callback_1,
                    csv_logger_callback_1,
                ])

In [None]:
model_1_performance = model_1.evaluate(test_data)
model_1_performance

In [None]:
# Saved the history, so going to load in the history from the csv it was saved to
history_1_loaded = pd.read_csv(f'logs/csv/{csv_logger_name_1}', index_col='epoch')
history_1_loaded

In [None]:
utils.plot.plot_history(history_1_loaded, 'loss')
utils.plot.plot_history(history_1_loaded, 'accuracy')

In [None]:
compare_histories(history_0_loaded, history_1_loaded, num_epochs_0)

In [None]:
# Load and Save model
model_1.save(model_1_path)

## Evaluating Fine Tuned Model

Let's make some predictions, visualize them, then find out which predictions were the most wrong. To do this, I am going to use the already fine tuned and exported model associated with the course this notebook follows at the link below:

* https://storage.googleapis.com/ztm_tf_course/food_vision/06_101_food_class_10_percent_saved_big_dog_model.zip

In [None]:
external_model_path = 'models/101_food_classes/06_101_food_class_10_percent_saved_big_dog_model'
external_model = tf.keras.models.load_model(external_model_path)

In [None]:
# EValuate the loaded model on the test data
external_model_performance = external_model.evaluate(test_data)
external_model_performance

In [None]:
# Making predictions with the model
pred_probs = external_model.predict(test_data, verbose=1)  # Set verbose to see how long is left
pred_probs[:10]

**NOTE**: Out model outpouts a prediction probability array for each image predicted in the above code block.

In [None]:
# Looking at a sample of the predictions for the first image (a prediction probability array)
pred_probs[0]

In [None]:
# Class index with the highest probability of our sample
print(f'The class with the highest value for first image: {test_data.class_names[pred_probs[0].argmax()]}')

### Comparing Predictions to Actual Class Labels

In [None]:
# Predicted class Indices
pred_class_indices = pred_probs.argmax(axis=1)
pred_class_indices

In [None]:
# 1. Need to unbatch the test_data dataset
y_labels = []

for images, labels in test_data.unbatch():
    # Currently, test labels look like [0,0,0,....1,0,...0,0,0] for each label due to one hot encoding
    y_labels.append(labels.numpy().argmax())

len(y_labels)

In [None]:
# NOTE: The labels with the test data are not shuffled, so the labels are in order
y_labels[:10], y_labels[-10:]

In [None]:
# Lets look at the accuracy score using sklearns accuracy score function
accuracy_score(y_labels, pred_class_indices)

In [None]:
# Making a confusion matrix for our model using sklearns confusion_matrix function
utils.plot.plot_confusion_matrix(y_labels, pred_class_indices, classes=test_data.class_names, figsize=(120,120))

#### Findings

After looking at the confusion matrix, the model performed decently well, however, foods that appear to look similar confuse the model. For instance, tira misue and chocolate cake are commonly mixed up, as well as springs rolls and samosas.

### Classification Report

Scitkit learn has a helpful function to get many classification metrics per class.

In [None]:
print(classification_report(y_labels, pred_class_indices))

In [None]:
# TODO: Need to functionize this
# Lets plot the classification report
# Get the dictionary of the classification report
model_classification_report = classification_report(y_labels, pred_class_indices, output_dict=True)

# Get the f1 score metric and the corresponding class name
class_name_to_f1_score = {}
for class_number, metrics in model_classification_report.items():
    # Multiple non-numeric keys occur which we don't want to store
    try:
        class_number = int(class_number)
    except:
        continue

    class_name=test_data.class_names[class_number]
    class_name_to_f1_score[class_name] = metrics['f1-score']

# Turn to a dataframe
class_name_to_f1_score_df = pd.DataFrame({
    'class_name': class_name_to_f1_score.keys(),
    'f1_score': class_name_to_f1_score.values()
})

# Sort the dataframe
class_name_to_f1_score_df = class_name_to_f1_score_df.sort_values('f1_score', ascending=True)

# Plotting the data
fig, ax = plt.subplots(figsize=(12,25))
scores = ax.barh(range(len(class_name_to_f1_score_df)), class_name_to_f1_score_df['f1_score'].values)
ax.set_yticks(range(len(class_name_to_f1_score_df)))
ax.set_yticklabels(class_name_to_f1_score_df['class_name']);
ax.set_xlabel('F1 Score')
ax.set_title('F1 Score for 101 Food Classes (Predicted by Food Vision Mini)');


### Visualizing Predictions on Custom Images

To visualize our model's prediction on our own images, we'll need a function to load and preprocess images, specifically it will need to:

* Read in a target image filepath using tf.io.read_file()
* Turn the image into a tensor using tf.io.decode_image()
* Resize the image tensor to be the same size as the images our model has trained on using tf.image.resize()
* Scale the image to get all of the pixel values between 0 & 1 (if necessary)

In [None]:
def load_and_prep_image(filename: str, image_size: int = 224, scale: bool = True) -> tf.Tensor:
    # Read in the image
    image = tf.io.read_file(filename)

    # Decode image into Tensor
    image = tf.io.decode_image(image, channels=3)

    # Resize image
    image = tf.image.resize(image, [image_size, image_size])

    # Scale Image to get all between 0 & 1 (not always required)
    if scale:
        image = image / 255.

    return image

In [None]:
# Displaying random image from test dataset, the predicted model, and our model's prediction
plt.figure(figsize=(17,10))

for i in range(3):
    # Choose a random test image from a random class directory
    random_class_name = random.choice(test_data.class_names)
    test_class_dir = f'{str(test_directory)}/{random_class_name}'
    filename = random.choice(os.listdir(f'{str(test_directory)}/{random_class_name}'))
    file_path = f'{test_class_dir}/{filename}'

    # Predict image
    img = load_and_prep_image(file_path, scale=False)
    expanded_img = tf.expand_dims(img, axis=0)
    pred_prob = external_model.predict(expanded_img, verbose=0)
    pred_class = test_data.class_names[pred_prob.argmax()]

    # Plot images
    plt.subplot(1, 3, i+1)
    plt.imshow(img/255.)
    if random_class_name == pred_class:
        title_color = 'g'
    else:
        title_color = 'r'
    plt.title(f'Actual: {random_class_name} | Predicted: {pred_class} | Prob: {pred_prob.max():.2f}', color=title_color, size=8)
    plt.axis(False)

### Identifying Most Incorrect Predictions

To get a better insight on what is confusing the model, we are going to look at predictions that had the highest confidence in their prediction, but they ended up being wrong. This can reveal some of the following insights:

* Data issues (wrong labels, e.g. model is correct but the true label is wrong)
* Confusing classes (get better/more diverse data)

To find out where the model is guessing wrong, going to find the following:

1. Get all the image file paths in the test dataset
2. Create a Pandas DF of the image file path, the true label, predicted class, and max predicted probabilities
3. Use DF to find all the incorrect predictions
4. Sort DF based on incorrect predictions with the highest probability predictions at the top
5. Visualize the images with the most wrong predictions

In [None]:
# 1. file paths in the test dataset
test_data_file_paths = []
for filepath in test_data.list_files(f'{str(test_directory)}/*/*.jpg', shuffle=False):
    test_data_file_paths.append(filepath.numpy())
len(test_data_file_paths), test_data_file_paths[0]

In [None]:
# 2. Create pandas dataframe
predicted_df = pd.DataFrame({
    'filepath': test_data_file_paths,
    'y_true': y_labels,
    'predicted_class': pred_class_indices,
    'predicted_confidence': pred_probs.max(axis=1),
    'y_true_classname': [test_data.class_names[i] for i in y_labels],
    'y_pred_classname': [test_data.class_names[i] for i in pred_class_indices],
})
predicted_df

In [None]:
# 3. Use df to find incorrect predictions
predicted_df['prediction_correct'] = predicted_df['y_true'] == predicted_df['predicted_class']
predicted_df

In [None]:
top_100_incorrect_predicted_df = predicted_df[predicted_df['prediction_correct'] == False].sort_values('predicted_confidence', ascending=False)[:100]
top_100_incorrect_predicted_df

In [None]:
# 5. Visualize test data samples with highest prediction that ended up being incorrect
top_100_incorrect_predicted_df[:9]
for row in top_100_incorrect_predicted_df[:9].iterrows():
    print(row[1].filepath)

In [None]:
# Displaying first 9 images from test dataset, the predicted model, and our model's prediction
start_index = 0
images_to_view = 9
column = 0
for i, row in enumerate(top_100_incorrect_predicted_df[start_index:start_index + images_to_view].iterrows()):
    if i % 3 == 0:
        plt.figure(figsize=(15,10))
        column += 1
    # Predict image
    img = load_and_prep_image(row[1].filepath, scale=False)
    expanded_img = tf.expand_dims(img, axis=0)

    # Plot images    
    plt.subplot(1, 3, i % 3 + 1)
    plt.imshow(img/255.)
    if row[1].y_true_classname == row[1].y_pred_classname:
        title_color = 'g'
    else:
        title_color = 'r'
    plt.title(f'Actual: {row[1].y_true_classname} | Predicted: {row[1].y_pred_classname} | Prob: {row[1].predicted_confidence:.2f}', color=title_color, size=8)
    plt.axis(False)

### Testing Model on Custom Images

In [None]:
image_dir = 'data/food-101/model_testing/food'
custom_image_filepaths = [f'{image_dir}/{filename}' for filename in os.listdir(image_dir)]

for image_filename in custom_image_filepaths:
    # Predict image
    img = load_and_prep_image(image_filename, scale=False)
    expanded_img = tf.expand_dims(img, axis=0)
    pred_prob = external_model.predict(expanded_img, verbose=0)
    pred_class = test_data.class_names[pred_prob.argmax()]

    # Plot images
    plt.figure()
    plt.imshow(img/255.)
    plt.title(f'Predicted: {pred_class} | Prob: {pred_prob.max():.2f}', size=8)
    plt.axis(False)
