<a href="https://colab.research.google.com/github/gpan79/DeepLearning/blob/main/Keras_Fashion_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Keras hyperparameter optimization - TPUs
[How to perform Keras hyperparameter optimization x3 faster on TPU for free](https://www.dlology.com/blog/how-to-perform-keras-hyperparameter-optimization-on-tpu-for-free/)

In [None]:
import numpy as np
import keras
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Using TensorFlow backend.


Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
x = np.concatenate((x_train, x_test), axis=0)

In [None]:
x.shape

(70000, 28, 28, 1)

In [None]:
y = np.concatenate((y_train, y_test), axis=0)
y.shape

(70000, 10)

In [None]:
!pip install -q talos

In [None]:
para = {
    'dense1_neuron': [256, 512],
    'activation': ['relu', 'elu'],
    'conv_dropout': [0.25, 0.4]
}

In [None]:
import tensorflow as tf
import os
def fashion_mnist_fn_tpu(x_train, y_train, x_val, y_val, params):
    tf.keras.backend.clear_session()
    conv_dropout = float(params['conv_dropout'])
    dense1_neuron = int(params['dense1_neuron'])
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(256, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(params['dense1_neuron']))
    model.add(tf.keras.layers.Activation(params['activation']))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.Activation('softmax'))
    
    tpu_model = tf.contrib.tpu.keras_to_tpu_model(
        model,
        strategy=tf.contrib.tpu.TPUDistributionStrategy(
            tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
        )
    )
    tpu_model.compile(
        optimizer=tf.train.AdamOptimizer(learning_rate=1e-3, ),
        loss=tf.keras.losses.categorical_crossentropy,
        metrics=['categorical_accuracy']
    )


    out = tpu_model.fit(
        x, y, epochs=10, batch_size = 1024,
        verbose=0,
        validation_data=[x_val, y_val]
    )
    
    return out, tpu_model.sync_to_cpu()


In [None]:
import talos as ta

In [None]:
scan_results = ta.Scan(x, y, para, fashion_mnist_fn_tpu)

  0%|          | 0/8 [00:00<?, ?it/s]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 12%|█▎        | 1/8 [01:37<11:21, 97.29s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 25%|██▌       | 2/8 [03:07<09:31, 95.21s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 38%|███▊      | 3/8 [04:46<08:00, 96.17s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 50%|█████     | 4/8 [06:15<06:16, 94.21s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 62%|██████▎   | 5/8 [07:51<04:44, 94.82s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 75%|███████▌  | 6/8 [09:23<03:07, 93.80s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

 88%|████████▊ | 7/8 [10:58<01:34, 94.28s/it]

INFO:tensorflow:Querying Tensorflow master (b'grpc://10.48.254.178:8470') for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13053220900497494750)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 3416799364142012891)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 15896227349262057063)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2160995130476431836)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11025133417255371479)
INFO:tensorflow:*** Available Device: _Devi

100%|██████████| 8/8 [12:29<00:00, 93.19s/it]

Scan Finished!





In [None]:
scan_results.data

Unnamed: 0,round_epochs,val_loss,val_categorical_accuracy,loss,categorical_accuracy,dense1_neuron,activation,conv_dropout
1,10,1.944123321442377,0.93385714333398,0.233759491784232,0.9145857144764492,256,elu,0.25
2,10,2.6013844833374025,0.9145238096600488,0.264676120529856,0.9036285713740756,512,relu,0.4
3,10,2.8792433555239723,0.92090476165499,0.2245595957960401,0.9186000000817436,512,relu,0.25
4,10,2.6286402245476133,0.9353333328792028,0.2297030824150358,0.9169000000953674,256,relu,0.25
5,10,0.1561698037102108,0.942095237618401,0.2190060072694506,0.9185428572109768,512,elu,0.25
6,10,2.4792048631395613,0.9157619050797962,0.2737321986470903,0.9019142858505248,256,relu,0.4
7,10,0.2169833079179128,0.9209523808842612,0.2636273577179228,0.9033285716193062,512,elu,0.4
8,10,0.1886364178600765,0.9290000001135328,0.2593251811913081,0.9050571429661342,256,elu,0.4


In [None]:
model_id = scan_results.data['val_categorical_accuracy'].astype('float').argmax() - 1
model_id + 1

5

In [None]:
tf.keras.backend.clear_session()
from tensorflow.keras.models import model_from_json
model = model_from_json(scan_results.saved_models[model_id])
model.set_weights(scan_results.saved_weights[model_id])

In [None]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 28, 28, 1)         4         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 64)        1664      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 14, 14, 64)        256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 128)       204928    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128)         0         
__________

In [None]:
model.save('./best_model.h5')

In [None]:
from google.colab import files

files.download('./best_model.h5')

In [None]:
# access the summary details
scan_results.details

complete_time              11/26/18/11:55
experiment_name             112618114301_
grid_downsample                      None
random_method            uniform_mersenne
reduce_loss                         False
reduction_interval                     50
reduction_method                     None
reduction_metric                  val_acc
reduction_threshold                   0.2
reduction_window                       20
x_shape                (70000, 28, 28, 1)
y_shape                       (70000, 10)
dtype: object

In [None]:
# use Scan object as input
report = ta.Reporting(scan_results)
# access the dataframe with the results
report.data.head(-3)

Unnamed: 0,round_epochs,val_loss,val_categorical_accuracy,loss,categorical_accuracy,dense1_neuron,activation,conv_dropout
1,10,1.944123321442377,0.93385714333398,0.233759491784232,0.9145857144764492,256,elu,0.25
2,10,2.6013844833374025,0.9145238096600488,0.264676120529856,0.9036285713740756,512,relu,0.4
3,10,2.8792433555239723,0.92090476165499,0.2245595957960401,0.9186000000817436,512,relu,0.25
4,10,2.6286402245476133,0.9353333328792028,0.2297030824150358,0.9169000000953674,256,relu,0.25
5,10,0.1561698037102108,0.942095237618401,0.2190060072694506,0.9185428572109768,512,elu,0.25


In [None]:
# get the number of rounds in the Scan
report.rounds()

8

In [None]:
# get the highest result for any metric
report.high('categorical_accuracy')

'0.9186000000817435'