<a href="https://colab.research.google.com/github/hmlewis-astro/dogrates_tweet_engineering/blob/main/dog_breed_classification_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Change to GPU runtime

#### Navigate to "Runtime > Change runtime type > GPU > Save"


# Mount Google Drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
from google.colab import files


# Name this model run

In [None]:
run_name = str(input('Name of model run (e.g. base_model, tuned_model_a, etc.): '))

# Import packages and libraries

In [None]:
import os
import glob
import random
from tqdm import tqdm
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2

import tensorflow as tf
print(tf.__version__)

from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow_datasets as tfds

import tensorflow.keras.backend as K


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print('Found GPU at: \n{}'.format(gpu_info))


In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')
  

# Get train/val data

In [None]:
dataset, info = tfds.load(name='stanford_dogs', with_info=True, shuffle_files=True)


In [None]:
# convert label indices to breed names 
breed_name = info.features['label'].int2str


In [None]:
image_size = 224
image_shape = (image_size, image_size, 3)
num_breeds = 120


In [None]:
val_frac = 0.5
val_size = int(val_frac*len(dataset['test']))

training_data = dataset['train']

dataset_test = dataset['test'].shuffle(buffer_size=10000)
valid_data = dataset_test.take(val_size)
test_data = dataset_test.skip(val_size)


In [None]:
def preprocess(dat):
  
    # convert images to floats, resize for ImageNet
    image = tf.image.convert_image_dtype(dat['image'], dtype=tf.float64)
    image = tf.image.resize(image, (image_size, image_size), method='nearest')

    # OHE labels
    label = tf.one_hot(dat['label'], num_breeds)

    return image, label


In [None]:
def prepare(dataset, batch_size=None):

    # call images into RAM in batches
    dat = dataset.map(preprocess, num_parallel_calls=4)
    dat = dat.shuffle(buffer_size=1000)

    if batch_size:
      dat = dat.batch(batch_size)

    return dat


# Display some sample dog pictures and corresponding breed labels

In [None]:
for dog in training_data.take(5):
    plt.figure()
    plt.imshow(dog['image'])
    plt.title(breed_name(dog['label']))


# Define base model architecture

In [None]:
tf.keras.backend.clear_session()

#base_model = tf.keras.applications.ResNet50(input_shape=image_shape,
base_model = tf.keras.applications.MobileNetV2(input_shape=image_shape,
                                               include_top=False,
                                               weights='imagenet')

base_model.trainable = False

# adding regularization
regularizer = tf.keras.regularizers.l1_l2(l1=0.05, l2=0.05)

for layer in base_model.layers:
    for attr in ['kernel_regularizer']:
        if hasattr(layer, attr):
          setattr(layer, attr, regularizer)


In [None]:
#base_model.summary()

# Define callbacks, load existing model (if exists), train model

In [None]:
dropout = 0.50

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                              patience=8, verbose=1, 
                                              restore_best_weights=True),
             tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                  factor=0.5, patience=3,
                                                  verbose=1)]

print(callbacks)

if run_name == 'final_model':
    callbacks.append(tf.keras.callbacks.CSVLogger('/content/drive/MyDrive/dog_breed_classifier_epoch_history.csv',
                                                   append=True))
    callbacks.append(tf.keras.callbacks.ModelCheckpoint('/content/drive/MyDrive/dog_breed_classifier.h5', 
                                                         save_best_only=True, save_freq='epoch'))

    print(callbacks)

if os.path.exists('/content/drive/MyDrive/dog_breed_classifier.h5'):
    print('Loading previously trained model.')
    model = models.load_model('/content/drive/MyDrive/dog_breed_classifier.h5')

else:
    print('Compiling new model.')
    model = tf.keras.Sequential([
                                 base_model,
                                 layers.BatchNormalization(),
                                 #layers.GlobalAveragePooling2D(),
                                 layers.GlobalMaxPool2D(),
                                 layers.Dropout(dropout),
                                 #layers.Flatten(),
                                 layers.Dense(320, activation='relu'),
                                 layers.Dropout(dropout),
                                 layers.Dense(num_breeds, activation='softmax')
                                 ])

    model.compile(optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
                  #optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
                  #optimizer=tf.keras.optimizers.RMSprop(),
                  #optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=[tf.keras.metrics.Precision(name='precision'),
                           tf.keras.metrics.Recall(name='recall'),
                           'accuracy',
                           tf.keras.metrics.TopKCategoricalAccuracy(k=3),
                           ])


In [None]:
model.summary()


In [None]:
tf.keras.utils.plot_model(model, to_file=f'{run_name}_architecture.png', 
                          show_shapes=True, show_layer_names=True)

if run_name == 'final_model':
    files.download('final_model_architecture.png')



In [None]:
!pip install visualkeras

In [None]:
import visualkeras

from PIL import ImageFont


In [None]:
font = ImageFont.truetype('/usr/share/fonts/truetype/LiberationMono-Regular.ttf', 28) 

if run_name == 'final_model':
    visualkeras.layered_view(model, 
                            font=font,
                            legend=True,
                            to_file='final_model_architecture_visualkeras.png')

    files.download('final_model_architecture_visualkeras.png')


In [None]:
epochs = 250

train_batches = prepare(training_data, batch_size=64)
valid_batches = prepare(valid_data, batch_size=64)

history = model.fit(train_batches,
                    epochs=epochs,
                    validation_data=valid_batches,
                    callbacks=callbacks,
                    #shuffle=True,
                    verbose=1)


# Plot metric vs. epoch

In [None]:
def plot_history(history, metric, save_fig=False):
    plt.plot(history.history[f'{metric}'], lw=3.0, color='dodgerblue')
    
    if metric != 'lr':
        plt.plot(history.history[f'val_{metric}'], lw=3.0, ls='--', color='deeppink')
    
    if metric == 'accuracy':
        metric_str = 'Accuracy'
        #plt.ylim(0.9, 1.0)
        
    elif metric == 'loss':
        metric_str = 'Loss'
        #plt.ylim(0.05, 0.35)
        
    elif metric == 'lr':
        metric_str = 'Learning Rate'
        
    elif metric == 'recall':
        metric_str = 'Recall'
        
    elif metric == 'precision':
        metric_str = 'Precision'

    elif metric == 'top_k_categorical_accuracy':
        metric_str = r'Top $k$ Categorical Accuracy'
        #plt.ylim(0.05, 0.35)
        
    else:
        matric_str = 'Metric'
        plt.ylim(0.1, 1.0)

    plt.title(f'{metric_str} v. Epoch')
    plt.xlabel('Epoch')
    
    plt.ylabel(f'{metric_str}')
    plt.legend(['Train','Validation'])

    plt.tight_layout()

    if save_fig:
        plt.savefig(f'{metric}_v_epoch_final_model.png', dpi=200)
        files.download(f'{metric}_v_epoch_final_model.png')

    plt.close()


In [None]:
metric_list = ['loss', 'precision', 'recall', 'accuracy', 'top_k_categorical_accuracy']

if run_name == 'final_model':
    save_final = True
else:
    save_final = False

for metric in metric_list:
    plot_history(history, metric, save_fig=save_final)


In [None]:
results = model.evaluate(prepare(test_data, batch_size=64))

print('\n')
for i,metric in enumerate(metric_list):
  print(f'test {metric}:', results[i])

# Display some sample dog pictures, corresponding breed labels, and predicted breed labels

In [None]:
def split_breed_name(name):
    name = name.split('-')[1].split('_')
    name = ' '.join(name)
    return name.title()

def display_breed_pred(index, save_fig=False):
    
    dog = list(test_data.as_numpy_iterator())[index]

    image, _ = preprocess(dog)
    image_reshape = tf.expand_dims(image, 0)

    predict_breed = model(image_reshape)
    top_components = tf.reshape(tf.math.top_k(predict_breed, k=3).indices,shape=[-1])

    top_breeds = [breed_name(i) for i in top_components]

    actual_breed = split_breed_name(breed_name(dog['label']))
    pred_breed = list(map(split_breed_name, top_breeds))[0]

    plt.figure()
    plt.imshow(dog['image'])
    plt.title(f"Predicted: {pred_breed}, Actual: {actual_breed}")

    ax = plt.gca()
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    
    plt.tight_layout()
    
    if save_fig:
      plt.savefig(f'predicted_dog_breed_{index}.png', dpi=200)
      files.download(f'predicted_dog_breed_{index}.png')
    
    plt.close()
    

In [None]:
for i in list(np.random.choice(range(len(test_data)), size=10)):
    display_breed_pred(i, save_fig=save_final)


In [None]:
files.download('/content/drive/MyDrive/dog_breed_classifier_epoch_history.csv')
files.download('/content/drive/MyDrive/dog_breed_classifier.h5')


In [None]:
with open('breed_name.csv','w', newline='') as f:
    for item in info.features['label'].names:
        f.write("%s\n" % item)

files.download('breed_name.csv')
