# Dog Breed Identification CNN

In [None]:
# Watermark
%load_ext watermark
%watermark -v -m -p tensorflow,numpy

In [None]:
# Imports
from datetime import datetime
import os
import pickle
from time import time

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [None]:
# Directories
base_dir = os.path.pardir

data_dir = os.path.join(base_dir, 'data')
processed_dir = os.path.join(data_dir, 'processed')
train_dir = os.path.join(processed_dir, 'train')
test_dir = os.path.join(processed_dir, 'test')

In [None]:
# Functions
def plot_train_result(data, metric, ax=None):
    """Helper to plot training result curves."""
    if not ax:
        ax = plt.gca()
    ax.plot(data[metric], label='Train')
    ax.plot(data[f"val_{metric}"], label='Validation')
    ax.set_xlabel('Epoch')
    ax.set_ylabel(metric)
    ax.set_title(f"Training process: {metric}")
    ax.legend()
    ax.grid()
    return ax

In [None]:
# Create the generators for the input data
input_size = (299, 299)
batch_size = 64

# - Data generators
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, 
    horizontal_flip=True,
    rotation_range=20
)
test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

# - Flow from directory generators
train_generator = train_datagen.flow_from_directory(
    train_dir, target_size=input_size, batch_size=batch_size,
    class_mode='categorical'
)
test_generator = test_datagen.flow_from_directory(
    test_dir, target_size=input_size, batch_size=batch_size,
    class_mode='categorical'
)

In [None]:
# Build the model
# - Construct the base model
base_model = keras.applications.inception_v3.InceptionV3(
    weights='imagenet', include_top=False
)

# - Only let the last inception block be trained
base_model.trainable = True
for layer in base_model.layers[:-62]:
    layer.trainable = False

# - Construct the full model
model = keras.Sequential([
    # - InceptionV3 base
    base_model,
    # - A pooling layer
    keras.layers.GlobalAveragePooling2D(),
    # - First dense layer
    keras.layers.Dense(1024, activation='relu'),
    # - Dropout layer
    keras.layers.Dropout(0.5),
    # - Second dense layer
    keras.layers.Dense(512, activation='relu'),
    # - Second dropout layer
    keras.layers.Dropout(0.3),
    # - Output layer
    keras.layers.Dense(120, activation='softmax')
])

model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
# Configure logging with TensorBoard and checkpoints
log_dir = os.path.abspath(os.path.join(
    base_dir, 'logs', f"{datetime.today():%Y%m%d}-{round(time())}"
))
cp_dir = os.path.join(log_dir, 'checkpoints')
os.makedirs(log_dir)
os.mkdir(cp_dir)

cb_tboard = keras.callbacks.TensorBoard(
    log_dir=log_dir, histogram_freq=0, update_freq='batch'
)
cb_checkpoint = keras.callbacks.ModelCheckpoint(
    os.path.join(cp_dir, 'init.{epoch:02d}-{val_loss:.2f}.hdf5'),
    save_weights_only=True, verbose=1
)
callbacks = [cb_tboard, cb_checkpoint]

In [None]:
# Train the model
epochs = 10

history = model.fit_generator(
    train_generator, epochs=epochs, shuffle=True, callbacks=callbacks,
    validation_data=test_generator
)
with open(os.path.join(cp_dir, 'history.pkl'), 'wb') as fout:
    pickle.dump(history.history, fout)

In [None]:
# Training result plots
fig, axs = plt.subplots(1, 2, figsize=(12, 4))

plot_train_result(history.history, 'loss', ax=axs[0])
plot_train_result(history.history, 'acc', ax=axs[1])

fig.suptitle('Initial Training', x=0.05, ha='left', fontsize=14, 
             fontweight='bold');

In [None]:
# Load from checkpoint
with open('../models/initial/history.pkl', 'rb') as fin:
    history = pickle.load(fin)
model.load_weights('../models/initial/init.05-0.85.hdf5')

In [None]:
# - Unfreeze another inception block
base_model.trainable = True
for layer in base_model.layers[:-(19+62)]:
    layer.trainable = False

# - Recompile the model
model.compile(optimizer=keras.optimizers.SGD(lr=0.0025), 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# - Fit the model (again)
cb_checkpoint = keras.callbacks.ModelCheckpoint(
    os.path.join(cp_dir, 'fine.{epoch:02d}-{val_loss:.2f}.hdf5'),
    save_weights_only=True, verbose=1
)
callbacks = [cb_tboard, cb_checkpoint]

In [None]:
# Fine-tune the model
epochs = 10

history_fine = model.fit_generator(
    train_generator, epochs=epochs, shuffle=True, callbacks=callbacks,
    validation_data=test_generator
)
with open(os.path.join(cp_dir, 'history_fine.pkl'), 'wb') as fout:
    pickle.dump(history_fine.history, fout)

In [None]:
# Plot fine-tuning results
fig, axs = plt.subplots(1, 2, figsize=(12, 4))

plot_train_result(history_fine.history, 'loss', ax=axs[0])
plot_train_result(history_fine.history, 'acc', ax=axs[1])

fig.suptitle('Fine-Tuning', x=0.05, ha='left', fontsize=14, 
             fontweight='bold');

In [None]:
# Load history_fine from file
with open('../models/fine/history_fine.pkl', 'rb') as fin:
    history_fine = pickle.load(fin)
model.load_weights('../models/fine/fine.03-0.48.hdf5')

In [None]:
# Combine the history objects from the 2 phases
tmp_hist = history.history.copy()
mid_epoch = len(tmp_hist)
for k in tmp_hist:
    tmp_hist[k].extend(history_fine.history[k])

In [None]:
# Plot the combined training process history
fig, axs = plt.subplots(1, 2, figsize=(12, 4))

ax = plot_train_result(history.history, 'loss', ax=axs[0])
ax.axvline(mid_epoch, color='black', linestyle='--')

ax = plot_train_result(history.history, 'accuracy', ax=axs[1])
ax.axvline(mid_epoch, color='black', linestyle='--')

fig.suptitle('Combined Training Process', x=0.05, ha='left', 
             fontsize=14, fontweight='bold');