In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from help_functions import create_model

In [2]:
IMAGE_DIMENSION = 64

In [3]:
checkpoint_dir = 'checkpoints/naive_26_labels_weights_20220531/'

In [4]:
training_metrics = pd.read_csv(checkpoint_dir + 'history.csv')
print(training_metrics.shape)
print(training_metrics)

(1, 5)
   epoch  accuracy      loss  val_accuracy  val_loss
0      0   0.21582  0.134346      0.264709  0.184852


In [None]:
# Plot training metrics: loss & accuracy

epochs = training_metrics.shape[0]

acc = training_metrics.accuracy.values
loss = training_metrics.loss.values

val_acc = training_metrics.val_accuracy.values
val_loss = training_metrics.val_loss.values

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(range(epochs), acc, label='Training Accuracy')
plt.plot(range(epochs), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(epochs), loss, label='Training Loss')
plt.plot(range(epochs), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [8]:
test_df = pd.read_json('data/splitted_dfs_500k_20220602/test_df.json.bz2', compression='bz2')
test_generator = ImageDataGenerator() 
print('\n----------- Test images -----------')          
test = test_generator.flow_from_dataframe(dataframe=test_df,
                                          directory='/scratch/WIT_Dataset/images',
                                          x_col='url', 
                                          y_col='labels', 
                                          batch_size=32,
                                          class_mode='categorical',
                                          validate_filenames=True,
                                          target_size=(IMAGE_DIMENSION, IMAGE_DIMENSION))
N_LABELS = len(test.class_indices)



----------- Test images -----------
Found 25000 validated image filenames belonging to 24 classes.


In [12]:
# Create model and load model weights
model = create_model(model_name='EfficientNetB0', n_labels=N_LABELS, image_dimension=IMAGE_DIMENSION)
latest = tf.train.latest_checkpoint(checkpoint_dir)
print(latest)
model.load_weights(latest)

# Evaluate model
loss, acc = model.evaluate(test, verbose=1)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

In [None]:
from sklearn.metrics import roc_auc_score, classification_report

predictions = model.predict(test, verbose=1)
threshold = 0.5
y_pred = 1 * (predictions > threshold)
y_true = np.zeros(y_pred.shape)
for row_idx, row in enumerate(test.classes):
    for idx in row:
        y_true[row_idx, idx] = 1
print(f'ROC AUC: {roc_auc_score(y_true, y_pred):.4f}')

# N_CLASSES = y_true.shape[1]
metrics_df = pd.DataFrame(classification_report(y_true, y_pred, target_names=list(test.class_indices), output_dict=True)).transpose()
metrics_df['index'] = np.concatenate((np.arange(start=0, stop=N_CLASSES), [None, None, None, None]))
print(metrics_df)

fig, axs = plt.subplots(1, 2, figsize=(12,12))

# Precision
sorted_indices_precision = np.argsort(metrics_df.precision[0:N_CLASSES])
sorted_precisions_per_class = metrics_df.precision[0:N_CLASSES][sorted_indices_precision]
# Recall
sorted_indices_recall = np.argsort(metrics_df.recall[0:N_CLASSES])
sorted_recalls_per_class = metrics_df.recall[0:N_CLASSES][sorted_indices_recall]

print('\n\n ----------- PER-CLASS ACCURACY ----------- \n ')
# Per-class accuracy
from collections import Counter
total = Counter()
correct = Counter()
for i in range(len(test.classes)):
    true_y = test.classes[i]
    for l in true_y:
        total[l]+=1
    predicted_y = np.argwhere(predictions[i]>=0.5)
    for p in predicted_y:
        if p[0] in true_y:
            correct[p[0]]+=1

name_id_map = test.class_indices
class_names = len(name_id_map)*[0]
for k in name_id_map.keys():
    class_names[name_id_map[k]] = k
            
for k in sorted(total.keys()):
    print(class_names[k].split(".")[-1], "{}/{} == {}".format(correct[k], total[k], round(correct[k]/total[k], 3)))
                
axs[0].set_title('Precision per class')
axs[0].barh(range(y_true.shape[1]), sorted_precisions_per_class, color='blue', alpha=0.6)
axs[0].set_yticks(range(N_CLASSES))
axs[0].set_yticklabels(np.array(list(test.class_indices.keys()))[sorted_indices_precision])
axs[0].set_xlabel('Precision')
axs[0].grid(True)

axs[1].set_title('Recall per class')
axs[1].barh(range(y_true.shape[1]), sorted_recalls_per_class, color='blue', alpha=0.6)
axs[1].set_yticks(range(N_CLASSES))
axs[1].set_yticklabels([])
axs[1].set_xlabel('Recall')
axs[1].grid(True)