# benchmarking using osx & radeon gpu on keras + plaidml

https://medium.com/@danbrice.datascience/deep-learning-on-a-mac-with-amd-gpu-4be1f18944a

### prep

In [1]:
import keras
import os
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

X_train.shape

(60000, 28, 28)

In [3]:
test = 'hello'
y_train.shape

(60000,)

In [4]:
test

'hello'

In [5]:
X_test.shape

(10000, 28, 28)

In [6]:
# one hot
n_categories = len(np.unique(y_train))
y_train = keras.utils.to_categorical(y_train, n_categories)
y_test = keras.utils.to_categorical(y_test, n_categories)

y_train.shape

(60000, 10)

In [7]:
# normalize
def normalize(X):
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    X = X.astype('float32')
    X /= 255
    
    return X

X_train = normalize(X_train)
X_test = normalize(X_test)

X_test.shape

(10000, 28, 28, 1)

In [8]:
def get_mnist_cnn_model(n_categories):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(filters=32, 
                                  kernel_size=(3,3), 
                                  activation='relu',
                                  input_shape=(28,28,1)))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Conv2D(filters=64,
                                 kernel_size=(3,3),
                                 activation='relu'))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Conv2D(filters=64,
                                 kernel_size=(3,3),
                                 activation='relu'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64,
                                activation='relu'))
    model.add(keras.layers.Dense(n_categories,
                                activation='softmax'))


    model.compile(loss=keras.losses.categorical_crossentropy,
                 optimizer=keras.optimizers.Adam(),
                 metrics=['accuracy'])
    
    return model

### params

In [14]:
EPOCHS = 15
BATCH_SIZE = 100

## CPU w/ tensorflow

In [10]:
keras.backend.backend()

'tensorflow'

In [11]:
keras.backend.tensorflow_backend._get_available_gpus()

[]

In [12]:
model_cpu = get_mnist_cnn_model(n_categories)

Instructions for updating:
Colocations handled automatically by placer.


In [13]:
%%time

model_cpu.fit(X_train, y_train,
             batch_size=BATCH_SIZE,
             epochs=EPOCHS,
             verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
CPU times: user 14min 53s, sys: 3min 6s, total: 17min 59s
Wall time: 6min 11s


<keras.callbacks.History at 0x12798d518>

In [14]:
model_cpu.evaluate(X_test, y_test, verbose=1)



[0.0314761482036105, 0.9913]

## GPU w/ plaidml

In [39]:
os.environ['KERAS_BACKEND'] = 'plaidml.keras.backend'

import importlib
importlib.reload(keras.backend)

Using plaidml.keras.backend backend.


<module 'keras.backend' from '/Users/jj/anaconda3/lib/python3.6/site-packages/keras/backend/__init__.py'>

In [15]:
keras.backend.backend()

'plaidml.keras.backend'

In [16]:
model_gpu = get_mnist_cnn_model(n_categories)

In [17]:
%%time

model_gpu.fit(X_train, y_train,
             batch_size=BATCH_SIZE,
             epochs=EPOCHS,
             verbose=1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
CPU times: user 1min 11s, sys: 1min 4s, total: 2min 15s
Wall time: 2min 33s


<keras.callbacks.History at 0x1a16067d68>

In [18]:
model_gpu.evaluate(X_test, y_test, verbose=1)



[0.03283998546600342, 0.9918]

# conclusion

Wall time comps:
- cpu via tensorflow backend: 6min 11s
- gpu via plaidml backend: 2min 33s