# Deep Learning Tensor Flow Utilites

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

# Install TensorFlow
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

print(tf.__version__)

2.2.0-rc4


## Working with files and folders

In [0]:
import shutil # copy, move file
import os # miscellaneous operation system interfaces
import pathlib

**Document: [shutil](https://docs.python.org/3/library/shutil.html#module-shutil)**

In [0]:
# Copy the file src to the file or directory dst
shutil.copy(src, dst)
# !cp ./src ./dst

# Move the file src to the file or directory dst
shutil.move(src, dst)
# !mv ./src ./dst

# Removing non-empty directory
shutil.rmtree('DemoDir/')
# !rm -rf DemoDir/

# Removing empty directory
os.rmdir("demoDir/")

**Document [os](https://docs.python.org/3/library/os.html):** This module provides a portable way of using operating system dependent functionality.

In [0]:
!mkdir tom

In [0]:
# Get current directory
print(os.getcwd())
# !pwd

# Get list of environment variables
print(os.environ)

# List of the names of the entries in the directory
print(os.listdir(path='.'))
# !ls .

# Create a directory named path
os.mkdir(path)
# !mkdir path

# Delete the file path
os.remove(path)

# Rename the file or directory src to dst
os.rename(src, dst)
# !mv ./tom.txt ./nhan.txt

# os.scandir(path) return an iterator of os.DirEntry
# item.is_dir() return True if the item is a directory
# item.is_file() return True if the item is a file

# Listing subdirectories
for item in os.scandir():
    if item.is_dir():
        print(item.name)

In [0]:
# Delete file if exists
del_file = "testfile.txt"
if os.path.isfile(del_file):
    os.remove(del_file)
    print("File removed successfully!")
else:
    print("File does not exist!")

File does not exist!




**Document: [pathlib](https://docs.python.org/3/library/pathlib.html):** This module offers classes representing filesystem paths with semantics appropriate for different operating systems.

In [0]:
# Listing subdirectories
path_root = pathlib.Path('.')
print([x.name for x in path_root.iterdir() if x.is_dir()])

In [0]:
# Get parent directory
path = pathlib.Path('./sample_data')
str(path.parent)

In [0]:
os.path.dirname('./sample_data')

In [0]:
# List Python source files
p = pathlib.Path('./sample_data')
list(p.glob('**/*.py'))

In [0]:
# Write a file
path = Path('.editorconfig')
with path.open(mode='wt') as config:
    config.write('# config goes here')

In [0]:
# Count number of files
!ls  ../data/train/*/* | wc -l
!ls  ../data/test/*/* | wc -l

## Get Data

Documentation [tf.data.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset), read data [from_generator](https://www.tensorflow.org/api_docs/python/tf/data/Dataset?version=stable#from_generator), [from_tensor_slices](https://www.tensorflow.org/api_docs/python/tf/data/Dataset?version=stable#from_tensor_slices)

Documentation [ImageDataGenerator](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator?version=stable). Read data [flow_from_dataframe](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator?version=stable#flow_from_dataframe), [flow_from_directory](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator?version=stable#flow_from_directory)

[TensorFlow Datasets](https://github.com/tensorflow/datasets/tree/master/docs): A collection of datasets ready to use with TensorFlow. Return tf.data.Dataset

[tf.keras.datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets): Keras built-in datasets. Return tuple of Numpy arrays

https://www.tensorflow.org/guide/data_performance

### **tf.data.Dataset**

#### Without Augmentation

In [0]:
ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# The tuples are unpacked into the positional arguments of the mapped function
def load_and_preprocess_from_path_label(path, label):
    return load_and_preprocess_image(path), label

image_label_ds = ds.map(load_and_preprocess_from_path_label)

BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE


# Setting a shuffle buffer size as large as the dataset ensures that the data is
# completely shuffled.
ds = image_label_ds.shuffle(buffer_size=image_count)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
# prefetch lets the dataset fetch batches in the background while the model is training.
ds = ds.prefetch(buffer_size=AUTOTUNE)

#### With Augmentation

In [0]:
IMAGE_WIDTH, IMAGE_HEIGHT = 192, 192

def resize_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMAGE_WIDTH, IMAGE_HEIGHT])
    return image

def load_and_resize_image(path):
    image = tf.io.read_file(path)
    return resize_image(image)

def images_preprocessing_and_saving(folder_path, image_paths, image_labels):
    new_paths = []
    for index, image_path in enumerate(image_paths):
        try:
            file_name = '{}.jpg'.format(index)
            new_path = os.path.join(folder_path, image_labels[index], file_name)
            preprocessed_image = load_and_resize_image(image_path)
            tf.keras.preprocessing.image.save_img(new_path, preprocessed_image, data_format='channels_last')
            new_paths.append(new_path)
            if (index % 100 == 0) and (index > 0):
                print('Processed {} images '.format(index))
        except:
            print("File error {}".format(image_path))
    return new_paths

def show_batch(image_batch, label_batch):
    '''Show batch of 25 images.
    
    Parameters:
        image_batch: paths to the images, len(image_path) >= 25
        label_batch: labels of the images (str)
    '''
    plt.figure(figsize=(10,10))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.title(label_batch[n].title())
        plt.axis('off')

In [0]:
image_batch = [load_and_resize_image(path)/255 for path in train_image_paths[:25]]
label_batch = train_image_labels[:25]
show_batch(image_batch , label_batch)

In [0]:
train_image_paths = images_preprocessing_and_saving(DATA_TRAIN_FOLDER, train_image_paths, train_image_labels)
test_image_paths = images_preprocessing_and_saving(DATA_TEST_FOLDER, test_image_paths, test_image_labels)

In [0]:
TRAIN_SAMPLES = 1229
TEST_SAMPLES = 308
BATCH_SIZE = 32

AUTOTUNE = tf.data.experimental.AUTOTUNE
STEPS_PER_TRAIN_EPOCH = tf.math.ceil(TRAIN_SAMPLES/BATCH_SIZE)
STEPS_PER_TEST_EPOCH = tf.math.ceil(TEST_SAMPLES/BATCH_SIZE)

In [0]:
train_ds = tf.data.Dataset.list_files(os.path.join(DATA_TRAIN_FOLDER, '*','*'))
test_ds = tf.data.Dataset.list_files(os.path.join(DATA_TEST_FOLDER, '*','*'))

for f in train_ds.take(5):
    print(f.numpy())

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    return parts[-2] == class_names

def preprocess(path):
    label = get_label(path)
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = (image*2) - 1  # normalize to [-1,1] range
    image = tf.image.per_image_standardization(image)
    return image, label

def augmentation(image, label):
    image = tf.image.random_brightness(image, .1)
    image = tf.image.random_contrast(image, lower=0.0, upper=1.0)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    return image, label
    
def prepare_for_training(ds, cache=True, shuffle_buffer_size=100, augment=False):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat()
    ds = ds.batch(BATCH_SIZE)
    
    if augment:
        ds.map(augmentation, num_parallel_calls=AUTOTUNE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(preprocess, num_parallel_calls=AUTOTUNE)

for image, label in train_ds.take(1):
    print("Image shape: ", image.numpy().shape)
    print("Label: ", label.numpy())

In [0]:
train_ds = prepare_for_training(train_ds, shuffle_buffer_size=TRAIN_SAMPLES, augment=True)
test_ds = prepare_for_training(test_ds, cache=False, shuffle_buffer_size=TEST_SAMPLES)

image_batch, label_batch = next(iter(train_ds))
label_batch = [class_names[np.argmax(label)] for label in label_batch.numpy()]
show_batch(image_batch, label_batch)

### **ImageDataGenerator**

* flow_from_dataframe

In [0]:
import pandas as pd

image_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, 
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest"
)

df = pd.DataFrame({'paths':train_image_paths, 'labels':label_names[train_image_labels]})

image_data = image_generator.flow_from_dataframe(df,
                                                 x_col='paths',
                                                 y_col='labels',
                                                 class_mode='sparse',
                                                 target_size=(192,192))

* flow_from_directory

In [0]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
                        TRAIN_DATA_DIR,
                        target_size=(IMG_WIDTH, IMG_HEIGHT),
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        seed=102,
                        class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
                        VALIDATION_DATA_DIR,
                        target_size=(IMG_WIDTH, IMG_HEIGHT),
                        batch_size=BATCH_SIZE,
                        shuffle=False,
                        class_mode='categorical')

model = build_model()
model.compile(loss='categorical_crossentropy',
              optimizer= tf.keras.optimizers.Adam(lr=0.001),
              metrics=['acc'])
num_steps_train = math.ceil(float(TRAIN_SAMPLES)/BATCH_SIZE)              
num_steps_val = math.ceil(float(VALIDATION_SAMPLES)/BATCH_SIZE)
model.fit_generator(train_generator,
          steps_per_epoch = num_steps_train,
          epochs=10,
          validation_data = validation_generator,
          validation_steps = num_steps_val)

### Dataset sources

#### **tensorflow_datasets**

In [0]:
import tensorflow_datasets as tfds

mnist = tfds.image.MNIST()

# Describe the dataset with DatasetInfo
assert mnist.info.features['image'].shape == (28, 28, 1)
assert mnist.info.features['label'].num_classes == 10
assert mnist.info.splits['train'].num_examples == 60000
assert mnist.info.splits['test'].num_examples == 10000

# Download the data, prepare it, and write it to disk
mnist.download_and_prepare()

# Load data from disk as tf.data.Datasets
datasets = mnist.as_dataset()
train_dataset, test_dataset = datasets['train'], datasets['test']
assert isinstance(train_dataset, tf.data.Dataset)

In [0]:
print(train_dataset.element_spec)
print(mnist.info)
tfds.show_examples(mnist.info, test_dataset)

In [0]:
for mnist_example in train_dataset.take(1):  # Only take a single example
    image, label = mnist_example["image"], mnist_example["label"]

    plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap("gray"))
    print("Label: %d" % label.numpy())

In [0]:
BATCH_SIZE = 128
NUM_EXAMPLES = 60000
NUM_TESTING = 10000

def preprocess(ds):
    """Puts the mnist dataset in the format Keras expects, (features, labels)."""
    x = tf.cast(ds['image'], tf.float32) / 255.0
    return x, ds['label']

train_dataset = train_dataset.map(preprocess).cache().repeat().shuffle(1024).batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.map(preprocess).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [0]:
# shorter way
mnist_data = tfds.load('mnist')
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']
mnist_train = mnist_train.map(preprocess).shuffle(NUM_EXAMPLES).batch(BATCH_SIZE).repeat()
mnist_test = mnist_test.map(preprocess).batch(BATCH_SIZE)

#### **tf.keras.datasets**

In [0]:
from tensorflow.keras.datasets import fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print(type(x_train))
plt.imshow(x_train[0], cmap='gray')
plt.show()

## Modeling

>Choosing built-in layers via [tf.keras.layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers/)

>Choosing activation functions via [tf.keras.activations](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/activations)

>Initializing the layer parameters via [tf.keras.initializers](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/initializers)

>Applying regularization to the layer parameters (to prevent overfitting) via [tf.keras.regularizers](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/regularizers)

>Optimizers via [tf.keras.optimizers](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/optimizers)

>Loss functions via [tf.keras.losses](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/losses)

>Performance metrics via [tf.keras.metrics](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/metrics)

>Pre-trained models via [tf.keras.applications](https://www.tensorflow.org/api_docs/python/tf/keras/applications)

In [0]:
def build_lenet5():
    model = tf.keras.models.Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax'),
    ])
    return model

model = build_alexnet()
model.summary()

In [0]:
def build_alexnet():
    model = tf.keras.models.Sequential([
        Conv2D(96, kernel_size=(11, 11), strides=(4,4), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Conv2D(256, kernel_size=(5, 5), activation='relu'),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
        Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'),
        Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'),
        MaxPooling2D(pool_size=(3, 3), strides=(2, 2)),
        BatchNormalization(),

        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    return model

model = build_alexnet()
model.summary()

**Pre-trained model**

In [0]:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=(192, 192, 3), include_top=False)
mobile_net.trainable=False

model = tf.keras.Sequential([
    mobile_net,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(len(label_names), activation = 'softmax')])

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=["accuracy"])

model.summary()

**Fine-tuning**

In [0]:
mobile_net.trainable=True

print("Number of layers in the base model:", len(mobile_net.layers))

fine_tune_at = 140

for index, layer in enumerate(mobile_net.layers):
    print(index, layer.name)

for layer in mobile_net.layers[:fine_tune_at]:
    layer.trainable = False

In [0]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              loss='sparse_categorical_crossentropy',
              metrics=["accuracy"])

model.summary()

In [0]:
steps_per_epoch = tf.math.ceil(0.9 * image_count/BATCH_SIZE).numpy()
val_steps_per_epoch = tf.math.ceil(0.1 * image_count/BATCH_SIZE).numpy()

fine_tune_epochs = 10
total_epochs = initial_epochs + fine_tune_epochs
history = model.fit(image_data, epochs=total_epochs, 
            steps_per_epoch=steps_per_epoch,
            callbacks = [batch_stats_callback_2],
            validation_data=test_set,
            validation_steps=val_steps_per_epoch,
            initial_epoch=initial_epochs)

## Keras Tuner

`python3 -m pip install -U keras-tuner`

Documentation [Keras Tuner](https://keras-team.github.io/keras-tuner/)

In [0]:
!python3 -m pip install -U keras-tuner

Collecting keras-tuner
[?25l  Downloading https://files.pythonhosted.org/packages/a7/f7/4b41b6832abf4c9bef71a664dc563adb25afc5812831667c6db572b1a261/keras-tuner-1.0.1.tar.gz (54kB)
[K     |██████                          | 10kB 28.5MB/s eta 0:00:01[K     |████████████                    | 20kB 32.4MB/s eta 0:00:01[K     |██████████████████              | 30kB 29.5MB/s eta 0:00:01[K     |████████████████████████        | 40kB 12.7MB/s eta 0:00:01[K     |██████████████████████████████  | 51kB 11.1MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 6.5MB/s 
Collecting terminaltables
  Downloading https://files.pythonhosted.org/packages/9b/c4/4a21174f32f8a7e1104798c445dacdc1d4df86f2f26722767034e4de4bff/terminaltables-3.1.0.tar.gz
Collecting colorama
  Downloading https://files.pythonhosted.org/packages/c9/dc/45cdef1b4d119eb96316b3117e6d5708a08029992b2fee2c143c7a0a5cc5/colorama-0.4.3-py2.py3-none-any.whl
Building wheels for collected packages: keras-tuner, terminalta

In [0]:
import kerastuner as kt
import tensorflow as tf

def build_model(hp):
    """Builds a convolutional model."""
    inputs = tf.keras.Input(shape=(28, 28, 1))
    x = inputs
    for i in range(hp.Int('conv_layers', 1, 3, default=3)):
        x = tf.keras.layers.Conv2D(
            filters=hp.Int('filters_' + str(i), 4, 32, step=4, default=8),
            kernel_size=hp.Int('kernel_size_' + str(i), 3, 5),
            activation='relu',
            padding='same')(x)

        if hp.Choice('pooling' + str(i), ['max', 'avg']) == 'max':
            x = tf.keras.layers.MaxPooling2D()(x)
        else:
            x = tf.keras.layers.AveragePooling2D()(x)

        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.ReLU()(x)

    if hp.Choice('global_pooling', ['max', 'avg']) == 'max':
        x = tf.keras.layers.GlobalMaxPooling2D()(x)
    else:
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)

    optimizer = hp.Choice('optimizer', ['adam', 'sgd'])
    model.compile(optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [0]:
import tensorflow_datasets as tfds

def preprocess(ds):
    """Puts the mnist dataset in the format Keras expects, (features, labels)."""
    x = tf.cast(ds['image'], tf.float32) / 255.0
    return x, ds['label']

mnist_data = tfds.load('mnist')
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']
mnist_train = mnist_train.map(preprocess).shuffle(1000).batch(100).repeat()
mnist_test = mnist_test.map(preprocess).batch(100)

Downloading and preparing dataset mnist/3.0.0 (download: 11.06 MiB, generated: Unknown size, total: 11.06 MiB) to /root/tensorflow_datasets/mnist/3.0.0...


local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead set
data_dir=gs://tfds-data/datasets.



HBox(children=(IntProgress(value=0, description='Dl Completed...', max=4, style=ProgressStyle(description_widt…



Dataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.0. Subsequent calls will reuse this data.


In [0]:
# Runs the hyperparameter search.

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='results_dir',
    project_name='mnist')

tuner.search(mnist_train,
             steps_per_epoch=600,
             validation_data=mnist_test,
             validation_steps=100,
             epochs=20,
             callbacks=[tf.keras.callbacks.EarlyStopping('val_accuracy')])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


In [0]:
tuner.results_summary()

In [0]:
tuner.get_best_hyperparameters()[0].values

{'conv_layers': 3,
 'filters_0': 16,
 'filters_1': 28,
 'filters_2': 20,
 'global_pooling': 'avg',
 'kernel_size_0': 3,
 'kernel_size_1': 4,
 'kernel_size_2': 4,
 'optimizer': 'sgd',
 'pooling0': 'max',
 'pooling1': 'avg',
 'pooling2': 'avg'}

## Callbacks

`model.fit(dataset, ..,  callbacks=[..])`

**Batch stats**

In [0]:
class CollectBatchStats(tf.keras.callbacks.Callback):
    def __init__(self):
        self.batch_losses = []
        self.batch_acc = []
        self.batch_val_losses = []
        self.batch_val_acc = []

    def on_train_batch_end(self, batch, logs=None):
        self.batch_losses.append(logs['loss'])
        self.batch_acc.append(logs['acc'])
        # reset_metrics: the metrics returned will be only for this batch. 
        # If False, the metrics will be statefully accumulated across batches.
        self.model.reset_metrics()
  
    def on_test_batch_end(self, batch, logs=None):
        self.batch_val_losses.append(logs['loss'])
        self.batch_val_acc.append(logs['acc'])
        # reset_metrics: the metrics returned will be only for this batch. 
        # If False, the metrics will be statefully accumulated across batches.
        self.model.reset_metrics()

def plot_stats(training_stats, val_stats, x_label='Training Steps', stats='loss'):
    stats, x_label = stats.title(), x_label.title()
    legend_loc = 'upper right' if stats=='loss' else 'lower right'
    training_steps = len(training_stats)
    test_steps = len(val_stats)

    plt.figure()
    plt.ylabel(stats)
    plt.xlabel(x_label)
    plt.plot(training_stats, label='Training' + stats)
    plt.plot(np.linspace(0, training_steps, test_steps), val_stats, label='Validation' + stats)
    plt.ylim([0,max(plt.ylim())])
    plt.legend(loc=legend_loc)
    plt.show()

batch_stats_callback = CollectBatchStats()

**Early Stopping**

In [0]:
es_callback = keras.callbacks.EarlyStopping(
    # Stop training when `val_loss` is no longer improving
    monitor='val_loss',
    # "no longer improving" being defined as "no better than 1e-2 less"
    min_delta=1e-2,
    # "no longer improving" being further defined as "for at least 2 epochs"
    patience=2,
    verbose=1)

**Model Checkpointing**

In [0]:
keras.callbacks.ModelCheckpoint(
        filepath='mymodel_{epoch}.h5',
        # Path where to save the model
        # The two parameters below mean that we will overwrite
        # the current checkpoint if and only if
        # the `val_loss` score has improved.
        save_best_only=True,
        monitor='val_loss',
        verbose=1)

**Learning rate schedule**

Several built-in schedules are available: ExponentialDecay,PiecewiseConstantDecay, PolynomialDecay, and InverseTimeDecay.

Documentation [tf.keras.optimizers.schedules](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules)

In [0]:
initial_learning_rate = 0.1
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)

## **TensorBoard**

In [0]:
keras.callbacks.TensorBoard(
  log_dir='/full_path_to_your_logs',
  histogram_freq=0,  # How often to log histogram visualizations
  embeddings_freq=0,  # How often to log embedding visualizations
  update_freq='epoch')  # How often to write logs (default: once per epoch)

In [0]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [0]:
# Clear any logs from previous runs
!rm -rf ./logs/

In [0]:
# You should be able to launch TensorBoard from the command line
%tensorboard --logdir ./logs/

## **Plot the result**


In [0]:
# Take 1 batch out to test
for image_batch, label_batch in test_set:
    print("Image batch shape: ", image_batch.shape)
    print("Label batch shape: ", label_batch.shape)
    break

predicted_batch = model.predict(image_batch)
predicted_id = np.argmax(predicted_batch, axis=1)
predicted_label_batch = label_names[predicted_id]

In [0]:
# Plot the result
plt.figure(figsize=(10,9))
plt.subplots_adjust(hspace=0.5)
for n in range(30):
    plt.subplot(6,5,n+1)
    plt.imshow((image_batch[n]+1)/2)
    color = "green" if predicted_id[n] == label_batch.numpy()[n] else "red"
    plt.title(predicted_label_batch[n].title(), color=color)
    plt.axis('off')
_ = plt.suptitle("Model predictions (green: correct, red: incorrect)")

## **Error Analysis**

In [0]:
ground_truth = validation_generator.classes
filenames = validation_generator.filenames
label_to_index = validation_generator.class_indices
index_to_label = dict((v,k) for k, v in label_to_index.items())
predictions = model.predict_generator(validation_generator, steps=None)

# prediction_table is a dict with index, prediction, ground truth
prediction_table = {}
for index, val in enumerate(predictions):
    # get argmax index
    index_of_highest_probability = np.argmax(val)
    value_of_highest_probability = val[index_of_highest_probability]
    prediction_table[index] = [value_of_highest_probability, 
                               index_of_highest_probability, 
                               ground_truth[index]]
assert len(predictions) == len(ground_truth) == len(prediction_table)

In [0]:
def get_images_with_sorted_probabilities(prediction_table,
                                         get_highest_probability,
                                         label,
                                         number_of_items,
                                         only_false_predictions=False):
    sorted_prediction_table = [(k, prediction_table[k])
                               for k in sorted(prediction_table,
                                               key=prediction_table.get,
                                               reverse=get_highest_probability)
                               ]
    result = []
    for index, key in enumerate(sorted_prediction_table):
        image_index, [probability, predicted_index, gt] = key
        if predicted_index == label:
            if only_false_predictions == True:
                if predicted_index != gt:
                    result.append(
                        [image_index, [probability, predicted_index, gt]])
            else:
                result.append(
                    [image_index, [probability, predicted_index, gt]])
    return result[:number_of_items]

def plot_images(filenames, distances, message):
    images = []
    for filename in filenames:
        images.append(mpimg.imread(filename))
    plt.figure(figsize=(20, 15))
    columns = 5
    for i, image in enumerate(images):
        ax = plt.subplot(len(images) / columns + 1, columns, i + 1)
        ax.set_title("\n\n" + filenames[i].split("/")[-1] + "\n" +
                     "\nProbability: " +
                     str(float("{0:.2f}".format(distances[i]))))
        plt.suptitle(message, fontsize=20, fontweight='bold')
        plt.axis('off')
        plt.imshow(image)
        
def display(sorted_indices, message):
    similar_image_paths = []
    distances = []
    for name, value in sorted_indices:
        [probability, predicted_index, gt] = value
        similar_image_paths.append(VALIDATION_DATA_DIR + filenames[name])
        distances.append(probability)
    plot_images(similar_image_paths, distances, message)

In [0]:
most_confident_dog_images = get_images_with_sorted_probabilities(prediction_table, True, 1, 10, False)
message = 'Images with highest probability of containing dogs'
display(most_confident_dog_images, message)

least_confident_dog_images = get_images_with_sorted_probabilities(prediction_table, False, 1, 10, False)
message = 'Images with lowest probability of containing dogs'
display(least_confident_dog_images, message)

incorrect_dog_images = get_images_with_sorted_probabilities(prediction_table, True, 1, 10, True)
message = 'Images of cats with highest probability of containing dogs'
display(incorrect_dog_images, message)