<a href="https://colab.research.google.com/github/eli-osherovich/mnist-demo/blob/main/tf_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import contextlib
import sys

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
sys.version

'3.6.9 (default, Jul 17 2020, 12:50:27) \n[GCC 8.4.0]'

In [None]:
import tensorflow as tf
from tensorflow import keras


from keras.datasets import mnist, fashion_mnist

In [None]:
tf.__version__

'2.3.0'

In [None]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-27c990db-17aa-8f86-26ae-3e364c07f1e8)


In [None]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
try:
  nullcontext = contextlib.nullcontext
except AttributeError:
  @contextlib.contextmanager
  def nullcontext():
    yield


In [None]:
# Set training context (TPU, GPU or CPU)
training_scope = nullcontext
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')

try:
  tpu =  tf.distribute.cluster_resolver.TPUClusterResolver()
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  tpu_strategy = tf.distribute.TPUStrategy(tpu)
  training_scope = tpu_strategy.scope
  policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
except ValueError:
  print("Cannot conect to tpu, using default accelerator")



Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  Tesla P100-PCIE-16GB, compute capability 6.0
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.
Cannot conect to tpu, using default accelerator


In [None]:
tf.config.optimizer.set_jit(True)
# tf.keras.mixed_precision.experimental.set_policy(policy)

In [None]:
# the data, split between train and test sets
dataset = fashion_mnist
dataset = mnist


(x_train, y_train), (x_test, y_test) = dataset.load_data()

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

x_train = x_train.astype('float32')/255 - 0.5
x_test = x_test.astype('float32')/255 - 0.5
y_train = y_train.astype('int')
y_test = y_test.astype('int')

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=10,
                                                          shear_range=0.1,
                                                          width_shift_range=0.1,
                                                          height_shift_range=0.1, 
                                                          zoom_range=0.1, 
                                                          fill_mode='constant', cval=0)
datagen.fit(x_train)


In [None]:
def gen_model(activation='relu', kernel_size=3):
  model = keras.models.Sequential()
  model.add(keras.Input(shape=(28,28,1)))

  model.add(keras.layers.Conv2D(16, kernel_size=kernel_size, activation=activation, padding='same'))
  model.add(keras.layers.Dropout(0.25))

  model.add(keras.layers.Conv2D(32, kernel_size=kernel_size, activation=activation, padding='same'))
  model.add(keras.layers.Dropout(0.25))

  model.add(keras.layers.BatchNormalization())
  model.add(keras.layers.Conv2D(64, kernel_size, activation=activation, padding='same'))
  model.add(keras.layers.Dropout(0.25))
  model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

  model.add(keras.layers.BatchNormalization())
  model.add(keras.layers.Conv2D(128, kernel_size, activation=activation, padding='same'))
  model.add(keras.layers.Dropout(0.8))
  model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

  model.add(keras.layers.Flatten())

  model.add(keras.layers.Dense(128, activation=activation))
  model.add(keras.layers.Dropout(0.2))

  model.add(keras.layers.Dense(10))

  return model


def compile_model(model, lr):
  model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                optimizer=keras.optimizers.Adam(learning_rate=lr),
                metrics=['accuracy'])

In [None]:
activation = 'relu'
kernel = 9
lr = 0.001
tf_model = gen_model(activation, kernel)

stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
)


In [None]:
%%time
for i in range(10):
  with training_scope():
    compile_model(tf_model, lr)
    tf_model.fit(datagen.flow(x_train, y_train, batch_size=1024),
              epochs=100,
              verbose=1,
              callbacks=[stop],
              validation_data=(x_test, y_test))
    lr /= 2
score = tf_model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoc

In [None]:
%%time
for i in range(5):
  with training_scope():
    compile_model(tf_model, lr)
    tf_model.fit(x_train, y_train,
              batch_size=1024,
              epochs=100,
              verbose=1,
              callbacks=[stop],
              validation_data=(x_test, y_test))
    lr /= 2
score = tf_model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 1/100
Epoch 2/100
Ep