In [None]:
''' Import modules and packages '''
import scipy as sp
import os
import numpy as np
import pandas as pd
import random
import PIL
import scipy.ndimage as spi
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array, load_img
%matplotlib inline
np.random.seed(42)

In [None]:
''' Load Train/Test Split '''
train_test_split_path = 'CUB_200_2011/train_test_split.txt'
train_test_split_file = open(train_test_split_path, 'r')
lines = train_test_split_file.readlines()

train_image_ids = []
test_image_ids = []
for line in lines:
    [img_id, train] = line.strip('\n').split(' ')
    if train == '1':
        train_image_ids.append(img_id)
    else:
        test_image_ids.append(img_id)

random.shuffle(train_image_ids)
random.shuffle(test_image_ids)

In [None]:
''' Read images and attributes into dataframes '''
images_filepath = 'CUB_200_2011/images.txt'
img_filepath_df = pd.read_csv(images_filepath, delimiter = " ", header=None)
img_filepath_df.columns = ["img_id", "filepath"]

images_classes = 'CUB_200_2011/classes.txt'
img_classes_df = pd.read_csv(images_classes, delimiter = " ", header=None)
img_classes_df.columns = ["img_id", "class"]

images_attributes = 'CUB_200_2011/attributes/image_attribute_labels.txt'
img_attributes_df = pd.read_csv(images_attributes, header=None, delimiter = "\n")
img_attributes_df = img_attributes_df[0].str.split(' ', expand=True).iloc[:, : 5]
img_attributes_df.columns = ["img_id", "attribute_id", "is_present", "certainty_id", "time"]

attribute_labels = 'CUB_200_2011/attributes.txt'
attribute_labels_df = pd.read_csv(attribute_labels, header=None, delimiter = " ")
attribute_labels_df.columns = ["attribute_id", "label"]

## Build Training, Validation, and Testing Datasets

In [None]:
''' Create reduced CUB 200-2011 dataset'''
img_dir = 'CUB_200_2011/images/'
data_labels = []
limit = 2000
for idx, img_idx in enumerate(img_filepath_df['img_id']):
    if idx < limit:
        filepath = img_filepath_df[img_filepath_df['img_id'] == int(img_idx)]['filepath'].item()
        obj_class = filepath.strip('.jpg').split('/')[0].split('.')[1]
        data_labels.append([img_idx, obj_class, img_dir+filepath])

train_data = pd.DataFrame(data_labels, columns = ['img_id', 'class', 'filepath'])

In [None]:
train_imgs = np.array([img_to_array(load_img(img, target_size=(299, 299)))
                           for img in train_data['filepath'].values.tolist()
                      ]).astype('float32')

# one-hot-encoded attributes based on is or is not present
train_attributes = []
for img_id in train_data['img_id'].values.tolist():
    attr_rows = img_attributes_df.loc[img_attributes_df['img_id'] == str(img_id)]
    ohe_attributes = np.array(attr_rows['is_present'])
    train_attributes.append(ohe_attributes)

target_labels = train_data['class']

In [None]:
# one-hot-encoded attributes based on is or is not present
train_attributes = []
for img_id in train_data['img_id'].values.tolist():
    attr_rows = img_attributes_df.loc[img_attributes_df['img_id'] == str(img_id)]
    ohe_attributes = np.array(attr_rows['is_present'])
    train_attributes.append(ohe_attributes)
train_attributes = np.array(train_attributes).astype(int)

In [None]:
# reduce size of dataset
leave_out_size = 0.0
#x_reduced, x_lefotover, y_reduced, y_leftover = train_test_split(train_imgs, target_labels, 
#                                                            test_size=leave_out_size, 
#                                                            stratify=np.array(target_labels), 
#                                                            random_state=42)
x_reduced, y_reduced = train_imgs, target_labels

# create train and test datasets (for image inputs)
x_train, x_test, y_train, y_test = train_test_split(x_reduced, y_reduced, 
                                                    test_size=0.3, 
                                                    stratify=np.array(y_reduced), 
                                                    random_state=42)

# create train and validation datasets (for image inputs)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, 
                                                  test_size=0.15, 
                                                  stratify=np.array(y_train), 
                                                  random_state=42)

print('Initial Dataset Size:', train_data.shape)
print('Reduced Dataset Size:', x_reduced.shape)
print('Initial Train and Test Datasets Size:', x_train.shape, x_test.shape)
print('Train and Validation Datasets Size:', x_train.shape, x_val.shape)
print('Train, Test and Validation Datasets Size:', x_train.shape, x_test.shape, x_val.shape)

In [None]:
# Reduce dataset
#x_reduced2, x_lefotover2, y_reduced2, y_leftover2 = train_test_split(train_attributes, target_labels, 
#                                                            test_size=leave_out_size, 
#                                                            stratify=np.array(target_labels), 
#                                                            random_state=42)
x_reduced2, y_reduced2 = train_attributes, target_labels

# create train and test datasets (for attributes)
x_train2, x_test2, y_train2, y_test2 = train_test_split(x_reduced2, y_reduced2, 
                                                    test_size=0.3, 
                                                    stratify=np.array(y_reduced2), 
                                                    random_state=42)

# create train and validation datasets (for attributes)
x_train2, x_val2, y_train2, y_val2 = train_test_split(x_train2, y_train2, 
                                                  test_size=0.15, 
                                                  stratify=np.array(y_train2), 
                                                  random_state=42)

print('Initial Train and Test Datasets Size:', x_train2.shape, x_test2.shape)
print('Train and Validation Datasets Size:', x_train2.shape, x_val2.shape)
print('Train, Test and Validation Datasets Size:', x_train2.shape, x_test2.shape, x_val2.shape)

In [None]:
''' One-Hot Encodings '''
y_train_ohe = pd.get_dummies(y_train.reset_index(drop=True))#.values()
y_val_ohe = pd.get_dummies(y_val.reset_index(drop=True))#.values()
y_test_ohe = pd.get_dummies(y_test.reset_index(drop=True))#.values()
labels_ohe_names = pd.get_dummies(target_labels, sparse=True)

# Missing columns
missing_cols = list(set(y_train_ohe.columns) - set(y_val_ohe.columns))
for col in missing_cols:
    col_len = len(y_val_ohe[y_val_ohe.columns[0]])
    fill_col = np.zeros(col_len)
    y_val_ohe[str(col)] = fill_col

y_train_ohe.shape, y_test_ohe.shape, y_val_ohe.shape, labels_ohe_names

In [None]:
#getting model predictions
x_test_rescaled = []
for x in x_test:
    x_rescaled = x / 255.0
    x_test_rescaled.append(x_rescaled)
    
print(np.shape(x_test_rescaled))
print(np.shape(x_test2))

### Compute Baseline Metrics

In [None]:
# Compute baseline scores for random species classifier
p = 1/36

baseline_accuracy = p
print(baseline_accuracy)

base_f1 = 2*p*(1-p)/(p+1-p)
print(base_f1)

In [None]:
# Compute baseline scores for random attributes classifier
x_test_flat = np.array(x_test2).flatten()
p = np.sum(x_test_flat) / len(x_test2) / 312 # probability of an element in vector being 1

baseline_accuracy = 1-p # guessing all zeros
print(baseline_accuracy)

baseline_f1 = 2*p*(1-p)/(p+1-p)
print(baseline_f1)

### Data Augmentation

In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
BATCH_SIZE = 32

# Create train generator.
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=30, 
                                   width_shift_range=0.2,
                                   height_shift_range=0.2, 
                                   horizontal_flip = 'true')
train_generator = train_datagen.flow(x_train, y_train_ohe, shuffle=False, 
                                     batch_size=BATCH_SIZE, seed=1)
                                     
# Create validation generator
val_datagen = ImageDataGenerator(rescale = 1./255)
val_generator = train_datagen.flow(x_val, y_val_ohe, shuffle=False, 
                                   batch_size=BATCH_SIZE, seed=1)      

In [None]:
''' Data augmentation for images '''
def augment(x):
    x = tf.keras.preprocessing.image.random_rotation(x, 30)
    x = tf.keras.preprocessing.image.random_shift(x, 0.2, 0.2)
    x = tf.image.random_flip_left_right(x)

    return x

In [None]:
x_train_augmented = []
for x in x_train:
    x_augmented = x / 255.0
    x_augmented = augment(x_augmented)
    x_train_augmented.append(x_augmented)
    
x_val_rescaled = []
for x in x_val:
    x_rescaled = x / 255.0
    x_val_rescaled.append(x_rescaled)

In [None]:
print(np.shape(x_train_augmented))
print(np.shape(x_train2))
print(np.shape(y_train_ohe))
print(np.shape(x_val_rescaled))
print(np.shape(x_val2))
print(np.shape(y_val_ohe))

## 1. Define Single-Task Model for Species Classification

In [None]:
''' Baseline Inception V3 Model '''
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, Adamax, Adadelta
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Input, concatenate, Flatten
from tensorflow.keras.applications.inception_v3 import InceptionV3
from keras.utils.np_utils import to_categorical

# InceptionV3 Image Feature Inputs
base_inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

total_classes = len(y_train_ohe.columns) #36

out = base_inception.output
out = GlobalAveragePooling2D()(out)
out = Dense(512, activation='relu')(out)
out = Dense(512, activation='relu')(out)
total_classes = y_train_ohe.shape[1]
predictions = Dense(total_classes, activation='softmax')(out)
model = Model(inputs=base_inception.input, outputs=predictions)

# Set to true if we want to fine-tune
for layer in base_inception.layers:
    layer.trainable = True
    
# Compile 
model.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy']) 
model.summary()

### Train Model

In [None]:
# Train the model
batch_size = BATCH_SIZE
train_steps_per_epoch = x_train.shape[0] // batch_size
val_steps_per_epoch = x_val.shape[0] // batch_size

history = model.fit(train_generator,
                    steps_per_epoch=train_steps_per_epoch,
                    validation_data=val_generator,
                    validation_steps=val_steps_per_epoch,
                    epochs=20, verbose=1)

In [None]:
# Show Loss Plot
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Plot')
plt.legend()
plt.show()

plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy Plot')
plt.legend()
plt.show()

train_accuracy_2000 = [0.3506, 0.7565, 0.8800, 0.9430, 0.9628, 0.9750, 0.9827, 0.9940, 0.9957, 0.9922, 0.9983, 0.9966, 0.9948, 0.9957, 0.9991, 0.9905, 0.9914, 0.9818, 0.9862, 0.9793]
val_accuracy_2000 = [0.4219, 0.6458, 0.6615, 0.7552, 0.7292, 0.7552,  0.7656, 0.7448, 0.7812, 0.7708, 0.7500, 0.7708, 0.760, 0.7500, 0.7344, 0.7396, 0.7135, 0.7708]
#train_accuracy_1600 = [0.22826087474822998, 0.7043478488922119, 0.8467391133308411, 0.925000011920929, 0.9641304612159729, 0.9773706793785095, 0.989130437374115, 0.9945651888847351, 0.9945651888847351, 0.9923912882804871, 0.9945651888847351, 0.9858695864677429, 0.9869565367698669, 0.9956521987915039, 0.9934782385826111, 0.9934782385826111, 0.997826099395752, 0.997826099395752, 0.9967391490936279, 0.9967391490936279]

plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.plot(train_accuracy_2000, label="2000 Samples Train Accuracy")
plt.plot(val_accuracy_2000, label="2000 Samples Validation Accuracy")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch')
plt.legend()
plt.show()

In [None]:
model.save_weights('cub_1_36_model/transfer_learning_species_classifier_weights')

In [None]:
model.save('cub_1_36_model/transfer_learning_species_classifier')

### Evaluate Model

In [None]:
test_predictions = model.predict(np.reshape(x_test_rescaled, (len(x_test_rescaled), 299, 299,3)))#([np.reshape(x_test_rescaled, (600, 299, 299,3)), x_test2])
predictions = pd.DataFrame(test_predictions, columns=labels_ohe_names.columns)
predictions = list(predictions.idxmax(axis=1))
test_labels = list(y_test)

from sklearn.metrics import accuracy_score, average_precision_score, precision_score, recall_score, f1_score

print("Model Evaluation Summary:")
print("Accuracy = ", accuracy_score(test_labels, predictions))
print("Precision = ", precision_score(test_labels, predictions, average='weighted'))
print("Recall = ", recall_score(test_labels, predictions, average='weighted'))
print("F1 Score = ", f1_score(test_labels, predictions, average='weighted'))

## 2. Train Single-Task Model for Attribute Estimation

### Define Model

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Input, concatenate, Flatten, ReLU
from tensorflow.keras.applications.inception_v3 import InceptionV3
from keras.utils.np_utils import to_categorical
from tensorflow_addons.metrics import HammingLoss

# Size of attribute vector
num_attributes = 312

# Feature Extractor
base_inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

img_inputs = base_inception.output
img_inputs = GlobalAveragePooling2D()(img_inputs) # pool features to 1D
fc1 = Dense(1028, activation='relu')(img_inputs) # operate on images features only
fc2 = Dense(512, activation='relu')(fc1)

# Attributes Classifier
attr = Dense(num_attributes, activation='sigmoid', name='attr_output')(fc2)

model2 = Model(inputs=base_inception.input, outputs=attr)

In [None]:
''' Define F1 Loss and Loss Function'''
import keras.backend as K

def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    y_true, y_pred = K.cast(y_true, tf.float32), K.cast(y_pred, tf.float32)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return 1 - K.mean(f1)

### Train Model

In [None]:
model2.compile(Adam(lr=.01), loss='binary_crossentropy', metrics=[f1, 'mse'])

# Train the model
batch_size = BATCH_SIZE
train_steps_per_epoch = x_train.shape[0] // batch_size
val_steps_per_epoch = x_val.shape[0] // batch_size

# Remove data_augmentation
history = model2.fit(x = np.reshape(x_train, (len(x_train), 299, 299, 3)), y = x_train2,
    validation_data=(np.reshape(x_val_rescaled, (len(x_val_rescaled), 299, 299, 3)), x_val2),
    batch_size=batch_size,
    shuffle=False,
    steps_per_epoch=train_steps_per_epoch,
    validation_steps=val_steps_per_epoch,
    epochs=10, verbose=1
)

In [None]:
model2.compile(Adam(lr=.001), loss='binary_crossentropy', metrics=[f1, 'mse'])

# Train the model
batch_size = BATCH_SIZE
train_steps_per_epoch = x_train.shape[0] // batch_size
val_steps_per_epoch = x_val.shape[0] // batch_size

# Remove data_augmentation
history = model2.fit(x = np.reshape(x_train, (len(x_train), 299, 299, 3)), y = x_train2,
    validation_data=(np.reshape(x_val_rescaled, (len(x_val_rescaled), 299, 299, 3)), x_val2),
    batch_size=batch_size,
    shuffle=False,
    steps_per_epoch=train_steps_per_epoch,
    validation_steps=val_steps_per_epoch,
    epochs=10, verbose=1
)

In [None]:
model2.compile(Adam(lr=.0001), loss='binary_crossentropy', metrics=[f1, 'mse'])

# Train the model
batch_size = BATCH_SIZE
train_steps_per_epoch = x_train.shape[0] // batch_size
val_steps_per_epoch = x_val.shape[0] // batch_size

# Remove data_augmentation
history = model2.fit(x = np.reshape(x_train, (len(x_train), 299, 299, 3)), y = x_train2,
    validation_data=(np.reshape(x_val_rescaled, (len(x_val_rescaled), 299, 299, 3)), x_val2),
    batch_size=batch_size,
    shuffle=False,
    steps_per_epoch=train_steps_per_epoch,
    validation_steps=val_steps_per_epoch,
    epochs=10, verbose=1
)

### Evaluate Model

In [None]:
# Show Loss Plot
plt.plot(np.log10(history.history['loss']), label="Train Loss")
plt.plot(np.log10(history.history['val_loss']), label="Validation Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Plot')
plt.legend()
plt.show()

In [None]:
def f1_score(y_true, y_pred, thresh=0.2, verbose=False):
    y_pred = [0 if val < thresh else val for val in y_pred]
    y_pred = [1 if val > 0 else val for val in y_pred]

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    TP = np.count_nonzero(y_pred * y_true, axis=0)
    FP = np.count_nonzero(y_pred * (y_true - 1), axis=0)
    FN = np.count_nonzero((y_pred - 1) * y_true, axis=0)
    TN = len(y_pred) - FP - FN - TP

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    acc = (TP + TN) / (TP + FP + TN + FN)
    f1 = 2 * precision * recall / (precision + recall)
    
    if verbose:
        print("Precision = ", precision)
        print("Recall = ", recall)
        print("F1 = ", f1)
        print("TP = ", TP)
        print("FP = ", FP)
        print("FN = ", FN)
    
    return precision, recall, f1, acc

In [None]:
#getting model predictions
x_test_rescaled = []
for x in x_test:
    x_rescaled = x / 255.0
    x_test_rescaled.append(x_rescaled)

In [None]:
from sklearn.metrics import mean_squared_error as mse

test_predictions = model2.predict(np.reshape(x_test_rescaled, (len(x_test_rescaled), 299, 299, 3)))

prec_scores, recall_scores, f1_scores, acc_scores = [], [], [], []
for y_true, y_pred in zip(x_test2, test_predictions):
    p, r, f1_val, acc = f1_score(y_true, y_pred, thresh=0.2)
    prec_scores.append(p)
    recall_scores.append(r)
    f1_scores.append(f1_val)
    acc_scores.append(acc)

print("Model Evaluation Summary:")
print("Accuracy = ", np.mean(acc_scores))
print("Average Precision = ", np.mean(prec_scores))
print("Average Recall = ", np.mean(recall_scores))
print("Average F1-score = ", np.mean(f1_scores))
print("MSE = ", mse(x_test2, test_predictions))

## 3. Train Model for Dual Species Classification and Attribute Estimation

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Input, concatenate, Flatten
from tensorflow.keras.applications.inception_v3 import InceptionV3
from keras.utils.np_utils import to_categorical

# Size of attribute vector
total_attributes = 312
total_classes = 36

# Feature Extractor
base_inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

# Shared layers
img_inputs = base_inception.output
img_inputs = GlobalAveragePooling2D()(img_inputs) # pool features to 1D
fc1 = Dense(1028, activation='relu')(img_inputs) # operate on images features only
fc2 = Dense(512, activation='relu')(fc1)

# Species Classifier 
fc3 = Dense(512, activation='relu')(fc2)
species = Dense(total_classes, activation='softmax', name='species_output')(fc3)

# Attributes Classifier
attr = Dense(total_attributes, activation='sigmoid', name='attr_output')(fc2)

model4 = Model(inputs=base_inception.input, outputs=[species, attr])

In [None]:
# Define loss functions
import keras.backend as K
def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

losses = {
    "species_output": "categorical_crossentropy",
    "attr_output": "binary_crossentropy",
}
lossWeights = {"species_output": 1.0, "attr_output": 0.0}

# Compile 
model4.compile(Adam(lr=.001), 
               loss=losses, 
               loss_weights=lossWeights,
               metrics=[f1, 'accuracy'])

model4.summary()

In [None]:
train_losses3 = [[],[],[]]
val_losses3 = [[],[],[]]

In [None]:
print(np.shape(x_train))
x_train_rescaled = [x / 255 for x in x_train]
print(np.shape(x_train_rescaled))

In [None]:
''' Train dual-task model '''
EPOCHS = 10

# Compile 
model4.compile(Adam(lr=.0001), 
               loss=losses, 
               loss_weights=[1, 1],
               metrics=[f1, 'accuracy'])

history = model4.fit(x=np.reshape(x_train_rescaled, (len(x_train_rescaled), 299, 299, 3)),
    y={"species_output": y_train_ohe, "attr_output": x_train2},
    validation_data =(np.reshape(x_val_rescaled, (len(x_val_rescaled), 299, 299, 3)),
        {"species_output": y_val_ohe, "attr_output": x_val2}),
    epochs=EPOCHS,
    verbose=1)

In [None]:
''' Plot loss functions '''
lossNames = ["loss", "species_output_loss", "attr_output_loss"]
plt.style.use("ggplot")
(fig, ax) = plt.subplots(3, 1, figsize=(13, 13))
# loop over the loss names
for (i, l) in enumerate(lossNames):
    train_losses3[i].extend(history.history[l])
    val_losses3[i].extend(history.history["val_" + l])
    
    # plot the loss for both the training and validation data
    title = "Loss for {}".format(l) if l != "loss" else "Total loss"
    ax[i].set_title(title)
    ax[i].set_xlabel("Epoch #")
    ax[i].set_ylabel("Loss")
    ax[i].plot(np.arange(0, len(train_losses3[i])), np.log10(train_losses3[i]), label=l)
    ax[i].plot(np.arange(0, len(val_losses3[i])), np.log10(val_losses3[i]), label="val_" + l)
    ax[i].legend()
# save the losses figure
plt.tight_layout()
plt.show()
#plt.close()
print(train_losses)
print(val_losses)

### Generate Predictions

In [None]:
#getting model predictions
x_test_rescaled = []
for x in x_test:
    x_rescaled = x / 255.0
    x_test_rescaled.append(x_rescaled)

In [None]:
test_predictions = model4.predict(np.reshape(x_test_rescaled, (len(x_test_rescaled), 299, 299, 3)))

species_predictions = test_predictions[0]
attr_predictions = test_predictions[1]

### Evaluate Species Classifier

In [None]:
predictions = pd.DataFrame(species_predictions, columns=labels_ohe_names.columns)
predictions = list(predictions.idxmax(axis=1))
test_labels = list(y_test)

from sklearn.metrics import accuracy_score, average_precision_score, precision_score, recall_score, f1_score

print("Model Evaluation Summary:")
print("Accuracy = ", accuracy_score(test_labels, predictions))
print("Precision = ", precision_score(test_labels, predictions, average='weighted'))
print("Recall = ", recall_score(test_labels, predictions, average='weighted'))
print("F1 Score = ", f1_score(test_labels, predictions, average='weighted'))

In [None]:
Model Evaluation Summary:
Accuracy =  0.3983333333333333
Precision =  0.4671287968412892
Recall =  0.3983333333333333
F1 Score =  0.40681243580411564

### Evaluate Attributes Classifier

In [None]:
def f1_score(y_true, y_pred, thresh=0.2, verbose=False):
    y_pred = [0 if val < thresh else val for val in y_pred]
    y_pred = [1 if val > 0 else val for val in y_pred]

    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    TP = np.count_nonzero(y_pred * y_true, axis=0)
    FP = np.count_nonzero(y_pred * (y_true - 1), axis=0)
    FN = np.count_nonzero((y_pred - 1) * y_true, axis=0)
    TN = len(y_pred) - FP - FN - TP

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    acc = (TP + TN) / (TP + FP + TN + FN)
    f1 = 2 * precision * recall / (precision + recall)
    
    if verbose:
        print("Precision = ", precision)
        print("Recall = ", recall)
        print("F1 = ", f1)
        print("TP = ", TP)
        print("FP = ", FP)
        print("FN = ", FN)
    
    return precision, recall, f1, acc

In [None]:
from sklearn.metrics import mean_squared_error as mse

prec_scores, recall_scores, f1_scores, acc_scores = [], [], [], []
for y_true, y_pred in zip(x_test2, attr_predictions):
    p, r, f1_val, acc = f1_score(y_true, y_pred, thresh=0.2)
    prec_scores.append(p)
    recall_scores.append(r)
    f1_scores.append(f1_val)
    acc_scores.append(acc)

print("Model Evaluation Summary:")
print("Accuracy = ", np.mean(acc_scores))
print("Average Precision = ", np.mean(prec_scores))
print("Average Recall = ", np.mean(recall_scores))
print("Average F1-score = ", np.mean(f1_scores))
print("MSE = ", mse(x_test2, attr_predictions))

In [None]:
2 * 0.54257 * 0.36656 / (0.54257 + 0.36656) #= 0.4014

## 4. Plot and Analyze Results

In [None]:
(fig, ax) = plt.subplots(1, 2, figsize=(15, 6))
num_samples = [2000, 1600, 1200, 800]

# Plot attributes scores
ax[0].set_title('Attributes F1-Score vs. # Labelled Training Samples')
ax[0].plot(num_samples, [0.4540, 0.3001, 0.3403, 0.3380], label='Single-task Classifier')
ax[0].plot(num_samples, [0.5522, 0.5407, 0.5317, 0.4375], label='Dual-task Classifier')
ax[0].legend()

# Plot species scores
ax[1].set_title('Species F1-Score vs. # Labelled Training Samples')
ax[1].plot(num_samples, [0.7795, 0.7840, 0.7612, 0.7427], label='Single-task Classifier')
ax[1].plot(num_samples, [0.6986, 0.6773, 0.6151, 0.01278], label='Dual-task Classifier')
ax[1].legend()

### Show Qualitative Examples

In [None]:
def evaluate_attribute_pred(y_true, y_pred, thresh=0.2):
    TP = [] # true positive labels
    FP = [] # false positive labels
    FN = [] # false negative labels
    TN = [] # true negative labels
    for i in range(len(y_pred)):
        # Get attribute name
        label = list(attribute_labels_df[attribute_labels_df['attribute_id'] == i+1]['label'].values)
        # True Negative
        if y_pred[i] < thresh and y_true[i] == 0: 
            TN.append(label)
        # True Positive
        if y_pred[i] > thresh and y_true[i] == 1:
            TP.append(label)
        # False Positive
        if y_pred[i] > thresh and y_true[i] == 0:
            FP.append(label)
        # False Negative
        if y_pred[i] < thresh and y_true[i] == 1:
            FN.append(label)
    
    assert (len(TP) + len(FP) + len(FN) + len(TN)) == len(y_true)

    print("True Positives: ", np.array(TP).flatten(), '\n')
    print("TP = ", len(TP))
    print("False Positives: ", np.array(FP).flatten(), '\n')
    print("FP = ", len(FP))
    print("False Negatives: ", np.array(FN).flatten(), '\n')
    print("FN = ", len(FN))

In [None]:
test_predictions = model.predict(np.reshape(x_test_rescaled, (len(x_test_rescaled), 299, 299,3)))#([np.reshape(x_test_rescaled, (600, 299, 299,3)), x_test2])
predictions = pd.DataFrame(test_predictions, columns=labels_ohe_names.columns)
predictions = list(predictions.idxmax(axis=1))
test_labels = list(y_test)

In [None]:
from sklearn.metrics import accuracy_score, average_precision_score, precision_score, recall_score, f1_score

print("Model Evaluation Summary:")
print("Accuracy = ", accuracy_score(test_labels, predictions))
print("Precision = ", precision_score(test_labels, predictions, average='weighted'))
print("Recall = ", recall_score(test_labels, predictions, average='weighted'))
print("F1 Score = ", f1_score(test_labels, predictions, average='weighted'))

In [None]:
idx = 0
for pred, true in zip(predictions, test_labels):
    if pred == true:
        print(idx)
    idx += 1

In [None]:
'''
    Examples shown in final report:
        Successful Examples: 2, 11
        Failure Examples: 1, 5, 25
'''
indices = [23]
test_labels = list(y_test)
test_attrs = list(x_test2)

for idx in indices:
    test_img = x_test_rescaled[idx]
    true_label = test_labels[idx]
    true_attr = test_attrs[idx]

    # Show true image, class, and attributes
    plt.imshow(test_img.reshape(299, 299, 3))
    plt.show()
    
    print("True Species Class: ", true_label)
    
    # Single-Task Species Classifier
    pred = model.predict(np.reshape(test_img, (1, 299, 299, 3)))
    pred = pd.DataFrame(pred, columns=labels_ohe_names.columns)
    pred = list(pred.idxmax(axis=1))[0]
    
    print("Single-Task Species Prediction: ", pred)
    
    # Single-Task Attributes Classifier
    species_pred = model2.predict(np.reshape(test_img, (1, 299, 299, 3)))[0]
    print("Single-Task Attributes Prediction Results:")
    evaluate_attribute_pred(true_attr, y_pred, thresh=0.25)
    
    print()
    
    # Dual-Task Model
    pred = model4.predict(np.reshape(test_img, (1, 299, 299, 3)))
    species_pred = list(pred[0][0]).index(max(pred[0][0]))
    species_pred = list(y_test_ohe.columns)[species_pred]
    y_pred = pred[1][0]
    print("Dual-Task Species Prediction: ", species_pred)
    print("Dual-Task Attributes Prediction Results:")
    
    evaluate_attribute_pred(true_attr, y_pred, thresh=0.25)