# Convolutional Neural Networks & Computer Vision

Computer vision offers a way to take real world images, and computationally analyze to predict and find patterns within visual data. For instance, the camera of a Self-Driving car.

Why can't this be done using normal Feed-Forward Neural Networks? Simple, the first step in analyzing an image with Feed Forward Neural Nets, you need to flatten the image to a single vector. For instance, a 28 x 28 pixel image flattens to a vector of length, 784. This is an extremely small image, and the inputs is of length 784. This is problematic with larger images because for the deep neural network to analyze, because there will be an extremely high number of weights to populate, and makes identifying patterns an extremly complex task.


## Imports

In [None]:
import os
import pathlib
import random
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from src import utils

## Helpers

In [None]:
def summarize_image_directory(data_directory: pathlib.Path):
    # Lets look at the number of files in the test and train sets
    # TODO: Move this to a nice function
    for dirpath, dirnames, filenames in os.walk(data_directory):
        images = [file for file in filenames if file.endswith('jpg') or file.endswith('jpeg') or file.endswith('png')]
        if images:
            print(f'Directory: {dirpath} Total Images: {len(images)}')

In [None]:
def get_classnames_from_directory(data_directory: pathlib.Path):
    all_class_names = [
        item.name for item in data_directory.iterdir() if item.is_dir() and not item.name.startswith('.')
    ]
    class_names = np.array(sorted(all_class_names))
    return class_names

In [None]:
def view_random_image(target_dir: str, target_class: str):
    target_folder = f'{target_dir}/{target_class}'

    random_image = random.sample(os.listdir(target_folder), 1)
    img = mpimg.imread(target_folder + '/' + random_image[0])

    plt.imshow(img)
    plt.title(target_class)
    plt.axis('off')
    
    print(f'{target_class.capitalize()} - Image Shape: {img.shape}')
    return img

In [None]:
def view_image_from_batch(images, labels, index = None):
    if index is None:
        index = random.randint(0,len(images)-1)
    plt.figure()
    plt.imshow(images[index])
    plt.title(labels[index])
    plt.axis('off')

In [None]:
def load_and_prep_image(filename, img_shape=224):
    """ Reads and preprocesses a custom image."""
    img = tf.io.read_file(filename)
    
    # Decode file into a tensor
    img = tf.image.decode_image(img)
    
    # Resize image
    img = tf.image.resize(img, size=(img_shape, img_shape))
    
    # Normalize the image
    img = img / 255.
    
    return img

In [None]:
def get_predicted_class(model, filename, class_names):
    # Loading and prepping the image
    prepped_img = load_and_prep_image(filename)
    
    # Predicting the image
    prediction = model.predict(tf.expand_dims(prepped_img, axis=0))
    
    # Need to check for binary vs multiclass
    if len(prediction[0]) > 1:
        class_name = class_names[tf.argmax(prediction[0])]
    else:
        class_name_index = int(tf.round(prediction))
        class_name = class_names[class_name_index]
    
    # Plotting the image
    plt.figure()
    plt.imshow(prepped_img)
    plt.title(f'Image: {class_name.capitalize()} ({str(prediction[0][0]*100)[:4]}% Confident)')
    plt.axis(False)

## Download & Analyze Dataset

The dataset used is the Food 101 dataset commonly used to to explore Computer Vision. For simplicity of getting started with Computer Vision, I am only going to be looking at two image classes to work quickly with a smaller dataset before applying learnings to the larger dataset.

* https://www.kaggle.com/datasets/dansbecker/food-101 (original kaggle page of dataset)
* https://github.com/mrdbourke/tensorflow-deep-learning/ (getting the modified dataset)

In [None]:
# Image dataset location
data_directory = pathlib.Path('./data/food-101/pizza_steak')
test_directory = data_directory / 'test'
train_directory = data_directory / 'train'

In [None]:
# Lets look at the number of files in the test and train sets
# TODO: Move this to a nice function
for dirpath, dirnames, filenames in os.walk(str(data_directory)):
    images = [file for file in filenames if file.endswith('jpg') or file.endswith('jpeg') or file.endswith('png')]
    if images:
        print(f'Directory: {dirpath} Total Images: {len(images)}')


In [None]:
# Getting Class names programatically
class_names = np.array(sorted([item.name for item in train_directory.iterdir() if item.is_dir() and not item.name.startswith('.')]))
class_names

In [None]:
# View a random img from training dataset
pizza_img = view_random_image(target_dir=str(train_directory), target_class=class_names[0])

In [None]:
steak_img = view_random_image(target_dir=str(train_directory), target_class=class_names[1])

## End-to-End Example

1. Need to load our images.
2. Need to normalize the images
3. Need to build. a CNN to find patterns in our images.
4. Need to compile our CNN.
5. Fit the CNN to training data.

### Convolutional Neural Network Model

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Preprocessing the data (Normalize all pixel values)
train_data_gen = ImageDataGenerator(rescale=1./255)
valid_data_gen = ImageDataGenerator(rescale=1./255)

# 2. Import data from directories and turn it into batches
train_data = train_data_gen.flow_from_directory(
    directory=str(train_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    seed=42)

valid_data = valid_data_gen.flow_from_directory(
    directory=str(test_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    seed=42)

# 3. Build the CNN Model
model_1 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=10,
                           kernel_size=3,
                           activation='relu',
                           input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2,
                              padding='valid'),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 4. Compile CNN Model
model_1.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.legacy.Adam(),
                metrics='accuracy')

# 5. Fit the CNN Model
history_1 = model_1.fit(train_data, epochs=5, steps_per_epoch=len(train_data))

In [None]:
model_1.summary()

In [None]:
utils.plot.plot_history(history_1)

### Feed Forward Neural Network Model (For Comparison)

In [None]:
# Setting random seed for comparison
tf.random.set_seed(42)

# 1. Create Model
model_2 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 2. Compile Model
model_2.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.legacy.Adam(),
                metrics=['accuracy'])

# 3. Fit Model
model_2.fit(train_data,
            epochs=5,
            steps_per_epoch=len(train_data),
            validation_data=valid_data,
            validation_steps=len(valid_data))

In [None]:
# Looking at some details of model_2
# There are ~602,000 trainable parameters here (20X more parameters than model 1).
model_2.summary()

In [None]:
# See if we can make a better feed forward model
# Setting random seed for comparison
tf.random.set_seed(42)

# 1. Create Model
model_3 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 2. Compile Model
model_3.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.legacy.Adam(),
                metrics=['accuracy'])

# 3. Fit Model
model_3.fit(train_data,
            epochs=5,
            steps_per_epoch=len(train_data),
            validation_data=valid_data,
            validation_steps=len(valid_data))

In [None]:
# Looking at model 3 structure
# ~15 million trainable parameters (500X number of trainable parameters compared to the CNN)
model_3.summary()

# Re-looking at Convolutional Neural Network Steps

Now that I have done a simple example of a CNN, and compared it against the feed forward neural network, I am going to take a step back and analyze the actual steps that goes into building out a CNN.

## Binary Classification (Breaking it Down)

1. Become one with the data
2. Preprocess the data (scaled/normalized)
3. Created model (started with a simple baseline)
4. Fit the Model
5. Evaluate the Model
6. Adjust and improve the Model (Beat the baseline)
7. Repeat until optimal/satisfied

## 0. Gathering the Dataset

In [None]:
# Image dataset location
data_directory = pathlib.Path('./data/food-101/pizza_steak')
test_directory = data_directory / 'test'
train_directory = data_directory / 'train'

## 1. Analyzing the Dataset

In [None]:
# Visualize the data
plt.figure()
plt.subplot(1, 2, 1)
steak_img = view_random_image(target_dir=str(train_directory), target_class=class_names[1])
plt.subplot(1, 2, 2)
pizza_img = view_random_image(target_dir=str(train_directory), target_class=class_names[0])

## 2. Preprocessing Data

Preparing data for the model (split data, normalize data, batch data, etc.).

### 2.1 Batch Data

The next step is to turn out data into **batches**. A batch is a small subset of data. Rather than look at all ~10,000 images at one time, a model might only look at 32 at a time. It does this for a couple of reasons:

1. 10,000 images (or more) might not fit into the memory of the processor.
2. Trying to learn the patterns in 10,000 images in one hit could result in a poorly learned model.

**NOTE** A batch size of 32 is a commonly used batch size for neural networks, and has been tested and found that it is a good batch size for many scenarios.

In [None]:
# Creating train and test data generators
train_data_gen = ImageDataGenerator(rescale=1./255)
test_data_gen = ImageDataGenerator(rescale=1./255)

train_data = train_data_gen.flow_from_directory(
    directory=str(train_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    seed=42)

test_data = valid_data_gen.flow_from_directory(
    directory=str(test_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    seed=42)

In [None]:
# Get a sample of a train data batch
images, labels = train_data.next()
len(images), len(labels)

In [None]:
# Looking at the shape of each image, and verify that it has been normalized
images[0].shape, images[0].max(), images[0].min()

## 3. Create, Compile, Fit & Evaluate Model

Starting with a Baseline model. A Baseline mode is a relatively simple model or existing result that you setup when beginning a machine learning experiment, and is used as a baseline when trying to improve your model.

In [None]:
# Make the Model Creation simpler
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

### Baseline Model

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
baseline_model = Sequential([
    Conv2D(filters=10,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    Conv2D(10, 3, activation='relu'),
    Conv2D(10, 3, activation='relu'),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
baseline_model.compile(loss='binary_crossentropy',
                       optimizer=Adam(),
                       metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
baseline_history = baseline_model.fit(train_data,
                                      epochs=5,
                                      steps_per_epoch=len(train_data),
                                      validation_data=test_data,
                                      validation_steps=len(test_data))

In [None]:
baseline_model.summary()

In [None]:
utils.plot.plot_history(baseline_history)

In [None]:
utils.plot.plot_history(baseline_history, metric='loss')
utils.plot.plot_history(baseline_history, metric='accuracy')

#### Findings

When a models's validation loss starts to increase, it's likely that the model is overfitting the training dataset This means it's learning the patterns in the training set too well and thus the model's ability to generalize unseen data will be diminished.

### Model-1: Adust Model to Combat Overfitting

Fitting a machine learning model comes in 3 steps:

0. Create a baseline model to compare models against.
1. Beat the baseline by overfitting a larger model.
2. Reduce overfitting,

Ways to induce overfitting:

1. Increase the number of conv layers.
2. Increase the number of conv filters.
3. Add another dense layer to the output of our flattened layer.


Reduce overfitting:

1. Add data augmentation.
2. Add regularization (MaxPool2D).
3. Add more data

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
model_1 = Sequential([
    Conv2D(filters=10,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    MaxPool2D(pool_size=2),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
model_1.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
model_1_history = model_1.fit(train_data,
                              epochs=5,
                              steps_per_epoch=len(train_data),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_1_history, metric='loss')
utils.plot.plot_history(model_1_history, metric='accuracy')

In [None]:
model_1.summary()

#### Findings

After implementing pooling into the CNN, the training data set and the validation (testing in our case) data set followed a similar trajectory. This is what we want to see in our models which limits overfitting.

NOTE: Reducing overfitting is also know as regularization.

## Model-2: Adjust Overfitting using Data Augmentation

Data augmentation is the process of altering our training data, leading it to have more diversity and in turn allowing ormodels to learn more generalizable (hopefully) patterns. Alterming might mean adjusting the rotation of an image, flipping it, cropping it, etc.

**NOTE** Data augmentation is usually only performed on the training data.

### Resetting Up the ImageDataGenerator & Image Datasets

In [None]:
# Create ImageDataGenerator training instance with Data Augmentation
train_data_gen_augmented = ImageDataGenerator(rescale=1./255,
                                             rotation_range=0.2,
                                             shear_range=0.2,
                                             zoom_range=0.2,
                                             width_shift_range=0.2,
                                             height_shift_range=0.3,
                                             horizontal_flip=True)

# Create ImageDataGenerator training instance without DataAugmentation
train_data_gen = ImageDataGenerator(rescale=1./255)
test_data_gen = ImageDataGenerator(rescale=1./255)

# Creating the datasets for training and test sets
train_data_augmented = train_data_gen_augmented.flow_from_directory(
    directory=str(train_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    shuffle=False)

train_data = train_data_gen.flow_from_directory(
    directory=str(train_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    shuffle=False)

test_data = test_data_gen.flow_from_directory(
    directory=str(test_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary')

### Visualizing the Augmented Data

In [None]:
images, labels = train_data.next()
augmented_images, augmented_labels = train_data_augmented.next()

random_index = random.randint(0, len(images)-1)

view_image_from_batch(images, labels, random_index)
view_image_from_batch(augmented_images, augmented_labels, random_index)

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
model_2 = Sequential([
    Conv2D(filters=10,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    MaxPool2D(pool_size=2),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
model_2.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
model_2_history = model_2.fit(train_data_augmented,
                              epochs=5,
                              steps_per_epoch=len(train_data_augmented),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_2_history, metric='loss')
utils.plot.plot_history(model_2_history, metric='accuracy')

## Model-3: Augmentation w/ Shuffled Data

In model 2, I didn't shuffle the data so I could see exactly what the augmentation does against data. Going to do the same thing as model-2, but turn on shuffle.

In [None]:
# Create ImageDataGenerator training instance with Data Augmentation
train_data_gen_augmented = ImageDataGenerator(rescale=1./255,
                                             rotation_range=0.2,
                                             shear_range=0.2,
                                             zoom_range=0.2,
                                             width_shift_range=0.2,
                                             height_shift_range=0.3,
                                             horizontal_flip=True)

# Create ImageDataGenerator training instance without DataAugmentation
test_data_gen = ImageDataGenerator(rescale=1./255)

# Creating the datasets for training and test sets
train_data_augmented = train_data_gen_augmented.flow_from_directory(
    directory=str(train_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary',
    shuffle=True)

test_data = test_data_gen.flow_from_directory(
    directory=str(test_directory),
    batch_size=32,
    target_size=(224, 224),
    class_mode='binary')

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
model_3 = Sequential([
    Conv2D(filters=10,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    MaxPool2D(pool_size=2),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
model_3.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
model_3_history = model_3.fit(train_data_augmented,
                              epochs=5,
                              steps_per_epoch=len(train_data_augmented),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_3_history, metric='loss')
utils.plot.plot_history(model_3_history, metric='accuracy')

### Findings

Just turning on shuffling for the training data sifnificantly increased accuracy from model 2.

## Model 4: Improve Accuracy by Increase Filters in Each Layer

Since we've already beaten our baseline, there are a few things to try to improve the model:

1. Increate number of model layers.
2. Increate filters in each layer.
3. Train for longer..
4. Find ideal learning rate.
5. More data!
6. Use transfer learning.

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
model_4 = Sequential([
    Conv2D(filters=20,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    MaxPool2D(pool_size=2),
    Conv2D(20, 3, activation='relu'),
    MaxPool2D(),
    Conv2D(20, 3, activation='relu'),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
model_4.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
model_4_history = model_4.fit(train_data_augmented,
                              epochs=5,
                              steps_per_epoch=len(train_data_augmented),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_4_history, metric='loss')
utils.plot.plot_history(model_4_history, metric='accuracy')

## Model 5: Increase Accuracy by Training for Longer

In [None]:
# Setting the random seed
tf.random.set_seed(42)

# 1. Create Model
model_5 = Sequential([
    Conv2D(filters=20,
           kernel_size=3,
           strides=1,
           padding='valid',
           activation='relu',
           input_shape=(224, 224, 3),
           name='InputLayer'),
    MaxPool2D(pool_size=2),
    Conv2D(20, 3, activation='relu'),
    MaxPool2D(),
    Conv2D(20, 3, activation='relu'),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation='sigmoid', name='OutputLayer')
])

# 2. Compile Model
model_5.compile(loss='binary_crossentropy',
                optimizer=Adam(),
                metrics=['accuracy'])

# 3. Fit Model
# NOTE: train_data is a combination of X and y, so don't need to send both
# NOTE: We don't have a validation dataset, so going to use the test set
model_5_history = model_5.fit(train_data_augmented,
                              epochs=10,
                              steps_per_epoch=len(train_data_augmented),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_5_history, metric='loss')
utils.plot.plot_history(model_5_history, metric='accuracy')

In [None]:
# Test Image location
test_image_directory = pathlib.Path('./data/food-101/model_testing')
steak_filename = test_image_directory / 'steak' / 'steak-and-eggs-2-2.jpeg'
steak_filename

In [None]:
steak = mpimg.imread(steak_filename)
plt.imshow(steak)
plt.axis(False)

In [None]:
steak.shape

#### NOTE

When you train a neural network and you want to make a preduction with it on you own custom data, 

In [None]:
get_predicted_class(model_4, str(steak_filename), class_names)

In [None]:
class_names

# Multi-class Image Classification

We've just been through a bunch of the steps below with binary classification. Now I am going to perform the following steps with a multi-class classification problem.

1. Become one with the data
2. Prepreocess the data.
3. Create a model (start with baseline).
4. Fit the model (overfit it to make sure it works).
5. Evaluate the model
6. Adjust hyperparameters to improve the model and reduce overfitting.
7. Repeat until satisfied.

## Imports

In [None]:
import os
import pathlib
import random
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from src import utils

## Download & Analyze Dataset

The dataset used is the Food 101 dataset commonly used to to explore Computer Vision. For simplicity of getting started with Computer Vision, I am only going to be looking at two image classes to work quickly with a smaller dataset before applying learnings to the larger dataset.

* https://www.kaggle.com/datasets/dansbecker/food-101 (original kaggle page of dataset)
* https://github.com/mrdbourke/tensorflow-deep-learning/ (getting the modified dataset)

In [None]:
# Image dataset location
data_directory = pathlib.Path('./data/food-101/10_food_classes_all_data')
test_directory = data_directory / 'test'
train_directory = data_directory / 'train'

## 1. Visualizing the Data

In [None]:
summarize_image_directory(data_directory)

In [None]:
# Let's get the subdirectories
class_names = get_classnames_from_directory(train_directory)
class_names

In [None]:
# Lets look at some images
img = view_random_image(target_dir=train_directory, target_class=random.choice(class_names))

## 2. Preprocessing the Data

In [None]:
# Scaling values
scale = 1. / 255
img_size = 224
batch_size = 32

# Creating data generator
train_data_gen = ImageDataGenerator(rescale=scale)
test_data_gen = ImageDataGenerator(rescale=scale)

# Loading data in batches
train_data = train_data_gen.flow_from_directory(str(train_directory),
                                                target_size=(img_size, img_size),
                                                batch_size=batch_size,
                                                class_mode='categorical')

test_data = test_data_gen.flow_from_directory(str(test_directory),
                                              target_size=(img_size, img_size),
                                              batch_size=batch_size,
                                              class_mode='categorical')

## 3. Create the Baseline Model

We've been takling a lot wabout the CNN explainer website. They've been using 10 classes as well, so lets start out with creating our baseline model using the same architecture they use.

* https://poloclub.github.io/cnn-explainer/

In [None]:
# Set seed
tf.random.set_seed(42)

# 1. Create Model (same as in CNN explainer)
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=10,
                           kernel_size=3,
                           strides=1,
                           padding='valid',
                           activation='relu',
                           input_shape=(img_size, img_size, 3),
                           name='InputLayer'),
    tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu'),
    tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax', name='OutputLayer')
    
])

# 2. Compile Model
baseline_model.compile(loss='categorical_crossentropy',
                       optimizer=tf.keras.optimizers.Adam(),
                       metrics=['accuracy'])

# 3. Fit Model
baseline_history = baseline_model.fit(train_data,
                                      epochs=5,
                                      steps_per_epoch=len(train_data),
                                      validation_data=test_data,
                                      validation_steps=len(test_data))

In [None]:
baseline_model.evaluate(test_data)

In [None]:
utils.plot.plot_history(baseline_history, metric='loss')
utils.plot.plot_history(baseline_history, metric='accuracy')

### Findings

From the loss curves, it the validation loss starts to increase when the training loss decreases, indicating that the model is overfitting. Next steps to adjust overfitting is to introduce data augmentation, regularization, etc.

Ways of adjusting the model for overfitting:

1. Get more data. This allows for more opportunity to learn diverse patterns.
2. Simplify the model. Remove layers, filters, etc.
3. Data Augmentation. Data augmentation manipulates the training data in such a way to add more diversity to it without altering the original data.
4. Transfer Learning: Transf er learning leverages the patterens another model has similar data that has been fitted for, and we can use those patterns on our model.

## Model-1: Adjust for Overfitting by Simplifying Model (Removing Layers)

Cutting out the two Conv2D layers before the MaxPool

In [None]:
# Set seed
tf.random.set_seed(42)

# 1. Create Model (same as in CNN explainer)
model_1 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=10,
                           kernel_size=3,
                           strides=1,
                           padding='valid',
                           activation='relu',
                           input_shape=(img_size, img_size, 3),
                           name='InputLayer'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Conv2D(filters=10, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax', name='OutputLayer')
    
])

# 2. Compile Model
model_1.compile(loss='categorical_crossentropy',
                       optimizer=tf.keras.optimizers.Adam(),
                       metrics=['accuracy'])

# 3. Fit Model
model_1_history = model_1.fit(train_data,
                                      epochs=5,
                                      steps_per_epoch=len(train_data),
                                      validation_data=test_data,
                                      validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_1_history, metric='loss')
utils.plot.plot_history(model_1_history, metric='accuracy')

In [None]:
model_1.summary()

#### Findings

From the loss curves, the accuracy went down and overfitting remained. Next step is to adjust the overfitting of the baseline model with data augmentation.

## Model-2: Addressing Overfitting Using Data Augmentation

Trying to reduce overitting with data augmentation. Ideally, we want to:
* Reduce overfitting
* Increase validation accuracy

##### Step-0: Loading, Preprocessing, and Augmenting Data

In [None]:
# Scaling values
scale = 1. / 255
img_size = 224
batch_size = 32

# Creating data generator
train_data_augmented_gen = ImageDataGenerator(rescale=scale,
                                    rotation_range=0.2,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    zoom_range=0.2,
                                    horizontal_flip=True)
test_data_gen = ImageDataGenerator(rescale=scale)

# Loading data in batches
train_data = train_data_augmented_gen.flow_from_directory(str(train_directory),
                                                          target_size=(img_size, img_size),
                                                          batch_size=batch_size,
                                                          class_mode='categorical')

test_data = test_data_gen.flow_from_directory(str(test_directory),
                                              target_size=(img_size, img_size),
                                              batch_size=batch_size,
                                              class_mode='categorical')

In [None]:
# Set seed
tf.random.set_seed(42)

# 1. Create Model (using clone model for quick)
model_2 = tf.keras.models.clone_model(baseline_model)

# 2. Compile Model
model_2.compile(loss='categorical_crossentropy',
                optimizer=tf.keras.optimizers.Adam(),
                metrics=['accuracy'])

# 3. Fit Model
model_2_history = model_2.fit(train_data,
                              epochs=5,
                              steps_per_epoch=len(train_data),
                              validation_data=test_data,
                              validation_steps=len(test_data))

In [None]:
utils.plot.plot_history(model_2_history, metric='loss')
utils.plot.plot_history(model_2_history, metric='accuracy')

#### Findings:

The loss curve looks significantly better! The loss curves resemble each other much better, and are still increasing after epoch 5, so more epochs would likely result in a better accuracy. How else can we experiment to improve this accuracy?

* Adjust model architecture
* Adjust augmentation hyper parameters.
* Train for longer
* Adjust learning rate.
* Try Transfer Learning

### Lets Test our Model w/ New Custom Images

In [None]:
# Test Image location
test_image_directory = pathlib.Path('./data/food-101/model_testing')
steak_filename = test_image_directory / 'steak' / 'steak-and-eggs-2-2.jpeg'
steak_filename

In [None]:
steak = mpimg.imread(steak_filename)
plt.imshow(steak)
plt.axis(False)

In [None]:
get_predicted_class(model_2, str(steak_filename), class_names)

## Save & Load Model

In [None]:
model_2.save('saved_trained_model_2')

In [None]:
# Verify that the model saved and loads correctly
loaded_model_2 = tf.keras.models.load_model('saved_trained_model_2')
loaded_model_2.evaluate(test_data)

In [None]:
model_2.evaluate(test_data)