# Keras MNIST TPU version

The goal of this notebook is to show the basic usage of colab TPU.

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import os
import pprint
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Dropout, Flatten

## Check device
Make sure that we are really using a TPU. We can change the runtime type in `runtime` to `TPU` if the notebook is not connected to a TPU backend.

In [2]:
try:
  device_name = os.environ['COLAB_TPU_ADDR']
  TPU_ADDRESS = 'grpc://' + device_name
  print('Found TPU at: {}'.format(TPU_ADDRESS))
  
  with tf.Session(TPU_ADDRESS) as session:
    devices = session.list_devices()
    
  print('TPU devices:')
  pprint.pprint(devices)

except KeyError:
  print('TPU not found')

Found TPU at: grpc://10.65.70.10:8470
TPU devices:
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 4552836185190424279),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 16074493523987032821),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8406393325839712748),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 7996198981334587940),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 3220244212352117603),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 10407558437934682077),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 16424903232580051683),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 17095809889331669049),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:5, TPU, 17179869184, 132018

In [3]:
# Shuffled and split the data into train and test sets.
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
print('X_train original shape', X_train.shape)
print('y_train original shape', y_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train original shape (60000, 28, 28)
y_train original shape (60000,)
60000 train samples
10000 test samples


In [0]:
X_train = X_train.reshape(60000, 28, 28, 1)
X_test = X_test.reshape(10000, 28, 28, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [0]:
model = tf.keras.models.Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [6]:
# It is almost the same as normal keras training, except we need to tranfer the model to a TPU.
tpu_model = tf.contrib.tpu.keras_to_tpu_model(
    model,
    strategy=tf.contrib.tpu.TPUDistributionStrategy(
        tf.contrib.cluster_resolver.TPUClusterResolver(tpu=TPU_ADDRESS)
    )
)
tpu_model.compile(
    optimizer=tf.train.AdamOptimizer(learning_rate=1e-3, ),
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    metrics=['sparse_categorical_accuracy']
)

def train_gen(batch_size):
  while True:
    offset = np.random.randint(0, X_train.shape[0] - batch_size)
    yield X_train[offset:offset+batch_size], y_train[offset:offset + batch_size]


INFO:tensorflow:Querying Tensorflow master (b'grpc://10.65.70.10:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 4552836185190424279)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 16074493523987032821)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8406393325839712748)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 7996198981334587940)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 3220244212352117603)
INFO:tensorflow:*** Available Device: _DeviceAt

## Train the model and time it

In [7]:
%%time
tpu_model.fit_generator(
    train_gen(128),
    epochs=12,
    steps_per_epoch=100,
    validation_data=(X_test, y_test),
)

Epoch 1/12
INFO:tensorflow:New input shapes; (re-)compiling: mode=train, [TensorSpec(shape=(16, 28, 28, 1), dtype=tf.float32, name='conv2d_input0'), TensorSpec(shape=(16, 1), dtype=tf.float32, name='dense_1_target0')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for conv2d_input
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 1.8830342292785645 secs
INFO:tensorflow:Setting weights on TPU model.
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for conv2d_input
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 1.2138936519622803 secs
INFO:tensorflow:New input shapes; (re-)compiling: mode=eval, [TensorSpec(shape=(2, 28, 28, 1), dtype=tf.float32, name='conv2d_input0'), TensorSpec(shape=(2, 1), dtype=tf.float32, name='dense_1_target0')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for conv2d_input
INFO:te

<tensorflow.python.keras.callbacks.History at 0x7fb235a71c50>

Wall time for training is 56.8 seconds using GC-TPU
- compare vs GC-<b>GPU</b> : https://colab.research.google.com/drive/1rawejJ21j-rN8HVG584hFFHkOrSzLCCy#scrollTo=cSkui8r5o8Hp
- and vs GC-<b>CPU</b> : https://colab.research.google.com/drive/1btld1Qk3V57FdpyUKnzLe-oDKNh16kAk#scrollTo=xd0mDBYtqOjN


## Check out the performance

In [8]:
score = tpu_model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

INFO:tensorflow:New input shapes; (re-)compiling: mode=eval, [TensorSpec(shape=(4, 28, 28, 1), dtype=tf.float32, name='conv2d_input0'), TensorSpec(shape=(4, 1), dtype=tf.float32, name='dense_1_target0')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for conv2d_input
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 0.6608164310455322 secs
Test loss: 0.052809363519754335
Test accuracy: 0.9864
