In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Importing the Necessary Modules

In [2]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

# the data, split between train and test sets
(x_train,y_train),(x_test,y_test) = mnist.load_data()

print(x_train.shape,y_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Using TensorFlow backend.


(60000, 28, 28) (60000,)


# 2. Preprocess the Data

In [3]:
x_train = x_train.reshape(x_train.shape[0],28,28,1)
x_test = x_test.reshape(x_test.shape[0],28,28,1)
input_shape = (28, 28, 1)

# convert class vectors to binary class matrices
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

print(x_train.shape)
x_test.shape


(60000, 28, 28, 1)


(10000, 28, 28, 1)

# 3. Importing Gpu's and Tpu's

In [4]:
# Detect hardware
import tensorflow as tf
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu = None
  gpus = tf.config.experimental.list_logical_devices("GPU")
    
# Select appropriate distribution strategy
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    # instantiate a distribution strategy
    strategy = tf.distribute.experimental.TPUStrategy(tpu) 
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])  
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:    
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Running on TPU  ['10.0.0.2:8470']
Number of accelerators:  8


# 4. Create the model

In [5]:
# Parameters
BATCH_SIZE = 64 * strategy.num_replicas_in_sync 
LEARNING_RATE = 0.01
LEARNING_RATE_EXP_DECAY = 0.6 if strategy.num_replicas_in_sync == 1 else 0.7

In [6]:
# Make Model
def make_model():
    model = tf.keras.Sequential(
      [
        tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1), name="image"),

        tf.keras.layers.Conv2D(filters=12, kernel_size=3, padding='same', use_bias=False), 
        tf.keras.layers.BatchNormalization(scale=False, center=True), 
        tf.keras.layers.Activation('relu'), 

        tf.keras.layers.Conv2D(filters=24, kernel_size=6, padding='same', use_bias=False, strides=2),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),

        tf.keras.layers.Conv2D(filters=32, kernel_size=6, padding='same', use_bias=False, strides=2),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(200, use_bias=False),
        tf.keras.layers.BatchNormalization(scale=False, center=True),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.Dropout(0.4), # Dropout on dense layer only

        tf.keras.layers.Dense(10, activation='softmax')
      ])

    model.compile(optimizer='adam', # learning rate will be set by LearningRateScheduler
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [7]:
# Creating the Model
with strategy.scope():
    model = make_model()

# print model layers
model.summary()

# set up learning rate decay
lr_decay = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: LEARNING_RATE * LEARNING_RATE_EXP_DECAY**epoch,
    verbose=True)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image (Reshape)              (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 12)        108       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 12)        36        
_________________________________________________________________
activation (Activation)      (None, 28, 28, 12)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 24)        10368     
_________________________________________________________________
batch_normalization_1 (Batch (None, 14, 14, 24)        72        
_________________________________________________________________
activation_1 (Activation)    (None, 14, 14, 24)        0

# 5. Train the Model

In [15]:
EPOCHS = 10
steps_per_epoch = 60000//BATCH_SIZE  # 60,000 items in this dataset
print("Steps per epoch: ", steps_per_epoch)
  
# Little wrinkle: in the present version of Tensorfow (1.14), switching a TPU
# between training and evaluation is slow (approx. 10 sec). For small models,
# it is recommeneded to run a single eval at the end.

history = model.fit(x_train,y_train,
                    steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                    callbacks=[lr_decay])

final_stats = model.evaluate(x_test,y_test, steps=1)
print("Validation accuracy: ", final_stats[1])

model.save('Handwritten-digit-mnist.h5')

Steps per epoch:  117

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.97900, saving model to best_model.h5

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006999999999999999.
Epoch 2/50
Epoch 00002: val_accuracy improved from 0.97900 to 0.97990, saving model to best_model.h5

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0049.
Epoch 3/50
Epoch 00003: val_accuracy improved from 0.97990 to 0.99360, saving model to best_model.h5

Epoch 00004: LearningRateScheduler reducing learning rate to 0.003429999999999999.
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.99360

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0024009999999999995.
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.99360

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0016806999999999994.
Epoch 6/50
Epoch 00006: val_accuracy improved from 0.99360 to 0.99410,


Epoch 00022: LearningRateScheduler reducing learning rate to 5.5854586408328325e-06.
Epoch 22/50
Epoch 00022: val_accuracy did not improve from 0.99450

Epoch 00023: LearningRateScheduler reducing learning rate to 3.909821048582983e-06.
Epoch 23/50
Epoch 00023: val_accuracy did not improve from 0.99450

Epoch 00024: LearningRateScheduler reducing learning rate to 2.736874734008088e-06.
Epoch 24/50
Epoch 00024: val_accuracy did not improve from 0.99450

Epoch 00025: LearningRateScheduler reducing learning rate to 1.9158123138056613e-06.
Epoch 25/50
Epoch 00025: val_accuracy did not improve from 0.99450

Epoch 00026: LearningRateScheduler reducing learning rate to 1.3410686196639628e-06.
Epoch 26/50
Epoch 00026: val_accuracy did not improve from 0.99450

Epoch 00027: LearningRateScheduler reducing learning rate to 9.38748033764774e-07.
Epoch 27/50
Epoch 00027: val_accuracy did not improve from 0.99450

Epoch 00028: LearningRateScheduler reducing learning rate to 6.571236236353417e-07.
E

Epoch 00043: val_accuracy did not improve from 0.99460

Epoch 00044: LearningRateScheduler reducing learning rate to 2.1838143759917907e-09.
Epoch 44/50
Epoch 00044: val_accuracy did not improve from 0.99460

Epoch 00045: LearningRateScheduler reducing learning rate to 1.5286700631942536e-09.
Epoch 45/50
Epoch 00045: val_accuracy did not improve from 0.99460

Epoch 00046: LearningRateScheduler reducing learning rate to 1.0700690442359773e-09.
Epoch 46/50
Epoch 00046: val_accuracy did not improve from 0.99460

Epoch 00047: LearningRateScheduler reducing learning rate to 7.490483309651841e-10.
Epoch 47/50
Epoch 00047: val_accuracy did not improve from 0.99460

Epoch 00048: LearningRateScheduler reducing learning rate to 5.243338316756289e-10.
Epoch 48/50
Epoch 00048: val_accuracy did not improve from 0.99460

Epoch 00049: LearningRateScheduler reducing learning rate to 3.6703368217294017e-10.
Epoch 49/50
Epoch 00049: val_accuracy did not improve from 0.99460

Epoch 00050: LearningRateSch

# 6. Evaluate the model

In [11]:
score = model.evaluate(x_test,y_test,verbose=0)
score

[0.015627268701791763, 0.995199978351593]