In [1]:
# GPU: 32*40 in 8.26s = 155/s
# CPU: 32*8 in 23.1s = 11/s

In [2]:
import os
import sys
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim
from tensorflow.contrib.slim.nets import resnet_v1
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("TensorFlow: ", tf.__version__)

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.13.3
TensorFlow:  1.4.0


In [3]:
!cat /proc/cpuinfo | grep processor | wc -l

6


In [4]:
!nvidia-smi --query-gpu=gpu_name --format=csv

name
Tesla K80


In [5]:
BATCH_SIZE = 32
RESNET_FEATURES = 2048
BATCHES_GPU = 40
BATCHES_CPU = 8

In [6]:
def give_fake_data(batches):
    """ Create an array of fake data to run inference on"""
    np.random.seed(0)
    dta = np.random.rand(BATCH_SIZE*batches, 224, 224, 3).astype(np.float32)
    return dta, np.swapaxes(dta, 1, 3)

In [7]:
def yield_mb(X, batchsize):
    """ Function yield (complete) mini_batches of data"""
    for i in range(len(X)//batchsize):
        yield i, X[i*batchsize:(i+1)*batchsize]

In [8]:
# Create batches of fake data
fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_GPU)
print(fake_input_data_cl.shape, fake_input_data_cf.shape)

(1280, 224, 224, 3) (1280, 3, 224, 224)


In [9]:
#%%bash
#wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz
#tar -xvf resnet_v1_50_2016_08_28.tar.gz
#rm resnet_v1_50_2016_08_28.tar.gz

In [10]:
def predict_fn(classifier, data, batchsize):
    """ Return features from classifier """
    out = np.zeros((len(data), RESNET_FEATURES), np.float32)
    for idx, dta in yield_mb(data, batchsize):
        pred = sess.run(classifier, feed_dict={input_tensor: dta}).squeeze()
        out[idx*batchsize:(idx+1)*batchsize] = pred
    return out

## 1. GPU

In [11]:
# Placeholders
checkpoint_file = 'resnet_v1_50.ckpt'
input_tensor = tf.placeholder(tf.float32, shape=(None,224,224,3), name='input_image')

# Load the model
sess = tf.Session()
arg_scope = resnet_v1.resnet_arg_scope()
with tensorflow.contrib.slim.arg_scope(arg_scope):
    # Docstring ->
    #     num_classes: Number of predicted classes for classification tasks. If None
    #  we return the features before the logit layer.
    logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=False)
    
saver = tf.train.Saver()
saver.restore(sess, checkpoint_file)

INFO:tensorflow:Restoring parameters from resnet_v1_50.ckpt


In [12]:
cold_start = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)

In [13]:
%%time
# GPU: 8.26s
features = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)

CPU times: user 7.85 s, sys: 1.02 s, total: 8.87 s
Wall time: 8.26 s


## 2. CPU

In [11]:
# HACK -> have to manually restart notebook and rerun
# Otherwise runs on GPU!!!!!

In [12]:
# Kill all GPUs ...
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [13]:
# Placeholders
checkpoint_file = 'resnet_v1_50.ckpt'
input_tensor = tf.placeholder(tf.float32, shape=(None,224,224,3), name='input_image')

# Load the model
sess = tf.Session()
arg_scope = resnet_v1.resnet_arg_scope()
with tensorflow.contrib.slim.arg_scope(arg_scope):
    # Docstring ->
    #     num_classes: Number of predicted classes for classification tasks. If None
    #  we return the features before the logit layer.
    logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=False)
    
saver = tf.train.Saver()
saver.restore(sess, checkpoint_file)

INFO:tensorflow:Restoring parameters from resnet_v1_50.ckpt


In [14]:
# Create batches of fake data
fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_CPU)
print(fake_input_data_cl.shape, fake_input_data_cf.shape)

(256, 224, 224, 3) (256, 3, 224, 224)


In [15]:
cold_start = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)

In [16]:
%%time
# CPU: 23.1s
features = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)

CPU times: user 1min 58s, sys: 3.84 s, total: 2min 2s
Wall time: 23.1 s
