### Imports

In [11]:
# Model Libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from keras import layers
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

In [None]:
# File Management Libraries and Helper Functions
import os
from pathlib import Path
import glob
import pickle

from breed_helpers import *

In [None]:
# Object anda Data Structure Management Libraries
from PIL import Image
import pandas as pd
import numpy as np

### Preprocessing

In [None]:
image_root = 'data/Images'
annot_root = 'data/Annotation'

dog_paths = np.array(glob.glob(image_root + '/*/*'))
annotations = np.array(glob.glob(annot_root + '/*/*'))
breed_list = [x.split('-', 1)[-1] for x in os.listdir(image_root)]

for i in range(len(annotations)):
        dog_paths[i] = dog_paths[i].replace('\\','/')
        annotations[i] = annotations[i].replace('\\','/')

# for i in range(len(breed_list)):
#     breed_list[i] = breed_list[i].split('-', 1)[-1]

In [None]:
df = pd.DataFrame(
        {'Breed': [get_dog_breed(x) for x in annotations], 
        'Folder_Dir': [x.split('/')[2].split('-')[0] for x in annotations], 
        'Image_Dir': [x.split('/')[-1] for x in annotations],
        'Bbox': [get_bbox(x) for x in annotations],
        'Num_Dogs': [len(get_bbox(x)) for x in annotations],
        'Image_Path': dog_paths})
df.head()

In [None]:
print(breed_list)
print(annotations)
print(dog_paths)

### Viewing/Expiriments

In [None]:
df.head(11)

In [None]:
for i in range(len(images)):
    if (np.shape(images[i]) != (299, 299, 3)):
        print(i)
        print(images[i])

In [None]:
item = df['Image_Path'].iloc[13680]
item = Image.open(item).convert('RGB').resize((desired_width, desired_height))
np.shape(np.asarray(item))

In [None]:
X = np.stack(X)

In [None]:
x_len = list()
y_len = list()

for bbox_arr in df.Bbox:
    for bbox in bbox_arr:
        xmin, ymin, xmax, ymax = bbox
        x_len.append(xmax-xmin)
        y_len.append(ymax-ymin)

x_avg = sum(x_len)/len(x_len)
x_min = min(x_len)
x_max = max(x_len)

y_avg = sum(y_len)/len(y_len)
y_min = min(y_len)
y_max = max(y_len)

print(f'x_min: {x_min}, x_avg: {x_avg}, x_max: {x_max}, y_min: {y_min}, y_avg: {y_avg}, y_max: {y_max}')
print(sorted(x_len, reverse=True))
print(sorted(y_len, reverse=True))

In [None]:
print(annotations[0])
assert get_image_path(annotations[0]) == dog_paths[0]
print(dog_paths[0])

In [None]:
for i in range(8):
    print(get_bbox(annotations[i]))

In [None]:
plt.figure(figsize=(10,6))
for i in range(8):
    plt.subplot(2, 4, i+1)

    bbox = get_bbox(annotations[i])

    plt.axis("off")
    dog = get_image_path(annotations[i])
    im = Image.open(dog)
    #im = im.resize((256,256), Image.ANTIALIAS)
    plt.imshow(im)
    
    for j in range(len(bbox)):
        xmin, ymin, xmax, ymax = bbox[j]
        plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin]) # showing border
        plt.text(xmin, ymin, get_dog_breed(annotations[i]), bbox={'ec': None})

In [None]:
# def create_cropped():
#     #plt.figure(figsize=(10,6))
#     for i in range(len(dog_image_paths)):
#         bbox = get_bbox(annotations[i])
#         dog = get_image_path(annotations[i])
#         im = Image.open(dog)
#         for j in range(len(bbox)):
#             im2 = im.crop(bbox[j])
#             #im2 = im2.resize((331,331), Image.ANTIALIAS)
#             new_path = dog.replace('data/Images/','data/Cropped/')
#             new_path = new_path.replace('.jpg', '-' + str(j) + '.jpg')
#             im2 = im2.convert('RGB')
#             head, tail = os.path.split(new_path)
#             Path(head).mkdir(parents=True, exist_ok=True)
#             im2.save(new_path)

### Train Test Split

In [12]:
with open('data/shuffled_brute_resized_299_images.pickle', 'rb') as file:
    X, y = pickle.load(file).values()

In [14]:
print(f'X size:{np.shape(X)}, y size:{np.shape(y)}')

X size:(20580, 299, 299, 3), y size:(20580,)


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y, random_state = 42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size = 0.5, stratify = y_test, random_state = 1)
print("X train, test, val: ", len(X_train), len(X_test), len(X_val))
print("y train, test, val: ", len(y_train), len(y_test), len(y_val))

X train, test, val:  16464 2058 2058
y train, test, val:  16464 2058 2058


### Model Defintion

In [None]:
class lr_scheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000, ramp_scalar=1, decay_scalar=1):
        super().__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.ramp_scalar = ramp_scalar
        self.decay_scalar = decay_scalar
        
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        step = tf.cast(step, dtype=tf.float32)
        arg1 = tf.math.rsqrt(step) * self.decay_scalar
        arg2 = step * (self.warmup_steps ** -1.5) * self.ramp_scalar
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
    
    def get_config(self):
        return {
            "d_model": self.d_model,
            "warmup_steps": self.warmup_steps,
        }
    
    def from_config(cls, config):
         return cls(**config)

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomTranslation(0.1, 0.1),
        layers.RandomZoom(0.1),
        layers.RandomRotation(0.2),
        layers.RandonContrast(0.1)
    ]
)

In [None]:
feature_learning = keras.Sequential(
    layers.Conv2D(32, filter_size=5, activation='relu'),
    layers.MaxPool2D(pool_size=3, stride=2),

    layers.Conv2D(64, filter_size=3, activation='relu'),
    layers.MaxPool2D(pool_size=2, stride=1)
)

In [None]:
classifier = keras.Sequential(
    layers.Flatten(),
    layers.Dense(1024, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(120, activation='softmax'),
)

In [None]:
model = keras.Sequential(
    data_augmentation,
    feature_learning,
    classifier
)

### Model Training

In [None]:
seq_layers = 4
dff = 2048

cnn_modules = 4
d_model = 128

dropout_rate = 0.2

epochs = 50
num_classes = df.Breed.nunique()

### Training Plots

In [None]:
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()

In [None]:
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
#plt.axis([75, 200, 1.2, 2.2])
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()

In [None]:
learning_rate = lr_scheduler(d_model)
lr = np.array([])
for i in range(1, 100):
    lr = np.append(lr, learning_rate.__call__(300*i))
plt.title('Learning Rate')
plt.xlabel('Epochs')
plt.ylabel('Learning Rate')
plt.plot(lr, label='Learning Rate')
#plt.axis([0, 300, 0, 0.000125])
plt.legend()

# learning_rate = lr_scheduler(d_model, ramp_scalar=1, decay_scalar=1)
# steps_per_epoch = np.ceil((np.shape(X_train)[0]/batch_size))
# print(steps_per_epoch)
# lr = np.array([])
# for i in range(1, np.maximum(epochs+10, 30)):
#     lr = np.append(lr, learning_rate.__call__((i*steps_per_epoch)))

### Model Evaluation

In [None]:
pred = model.predict(X_test)

In [None]:
def norm_cm(cm):
    return np.array([cm[i] / np.sum(cm[i]) for i in range(len(cm))])
def accuracy(cm):
    return cm.diagonal().sum() / cm.sum()
cm = confusion_matrix(y_test, np.argmax(pred, axis=1))

normalized_cm = norm_cm(cm)
print('accuracy', accuracy(cm))

In [None]:
sns.heatmap(cm, xticklabels=range(2, 11), yticklabels=range(2, 11), annot=True, fmt='g', square=True)

In [None]:
sns.heatmap(normalized_cm, xticklabels=range(2, 11), yticklabels=range(2, 11), annot=True, fmt='.2f', square=True)