In [1]:
import sys, os, time, re, gc
from pathlib import Path
from glob import glob

# Select GPU
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import vgg16, vgg19, resnet_v2

%matplotlib inline

tf.__version__

'2.3.0'

In [2]:
# Make sure GPU is available
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Determine paths and directories

In [3]:
path_imagenet_val_dataset = Path("data/") # path/to/data/
dir_images = Path("data/val") # path/to/images/directory
path_labels = Path("data/ILSVRC2012_validation_ground_truth.txt")
path_synset_words = Path("data/synset_words.txt")
path_meta = Path("data/meta.mat")

## Load data

In [4]:
x_val_paths = glob(str(path_imagenet_val_dataset / "x_val*.npy"))

# Sort filenames in ascending order
x_val_paths.sort(key=lambda f: int(re.sub('\D', '', f)))

In [5]:
y_val = np.load(str(path_imagenet_val_dataset / "y_val.npy"))

In [6]:
y_val_one_hot = to_categorical(y_val, 1000)

## Benchmark models

In [7]:
def top_k_accuracy(y_true, y_pred, k=1, tf_enabled=True):
    '''
    Calculates top_k accuracy of predictions. Expects both y_true and y_pred to be one-hot encoded.
    numpy implementation is from: https://github.com/chainer/chainer/issues/606
    '''

    if tf_enabled:
        argsorted_y = tf.argsort(y_pred)[:,-k:]
        matches = tf.cast(tf.math.reduce_any(tf.transpose(argsorted_y) == tf.argmax(y_true, axis=1, output_type=tf.int32), axis=0), tf.float32)
        return tf.math.reduce_mean(matches).numpy()
    else:
        argsorted_y = np.argsort(y_pred)[:,-k:]
        return np.any(argsorted_y.T == y_true.argmax(axis=1), axis=0).mean()

In [8]:
K.clear_session()
model = vgg19.VGG19()

In [17]:
y_pred = None
for i, x_val_path in enumerate(x_val_paths):
    
    x_val = np.load(x_val_path).astype('float32') # loaded as RGB
    x_val = vgg19.preprocess_input(x_val) # converted to BGR
    
    y_pred_sharded = model.predict(x_val, verbose=0, use_multiprocessing=True, batch_size=64, callbacks=None)
    
    try:
        y_pred = np.concatenate([y_pred, y_pred_sharded])
    except ValueError:
        y_pred = y_pred_sharded
        
    del x_val
    gc.collect()
    
    completed_percentage = (i + 1) * 100 / len(x_val_paths)
    if completed_percentage % 5 == 0:
        print("{:5.1f}% completed.".format(completed_percentage))

  5.0% completed.
 10.0% completed.
 15.0% completed.
 20.0% completed.
 25.0% completed.
 30.0% completed.
 35.0% completed.
 40.0% completed.
 45.0% completed.
 50.0% completed.
 55.0% completed.
 60.0% completed.
 65.0% completed.
 70.0% completed.
 75.0% completed.
 80.0% completed.
 85.0% completed.
 90.0% completed.
 95.0% completed.
100.0% completed.


#### Top-1 Accuracy

Compare to 0.713 from Keras documentation

In [19]:
top_k_accuracy(y_val_one_hot, y_pred, k=1)

0.71248

#### Top-5 Accuracy

Compare to 0.900 from Keras documentation

In [20]:
top_k_accuracy(y_val_one_hot, y_pred, k=5)

0.89986

### Save predictions

In [17]:
np.save(str(path_imagenet_val_dataset / "y_pred_VGG19.npy"), y_pred)