<a href="https://colab.research.google.com/github/few-shot-learning/Keras-FewShotLearning/blob/notebooks%2Fbenchmark-perf-caches/notebooks/benchmark_caching_performance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! git clone https://github.com/few-shot-learning/Keras-FewShotLearning.git
%cd Keras-FewShotLearning
! curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
! $HOME/.poetry/bin/poetry config virtualenvs.create false --local
! $HOME/.poetry/bin/poetry add protobuf==3.10.0  # Fix for colab version
! $HOME/.poetry/bin/poetry add gpumonitor  # Fix for colab version

Cloning into 'Keras-FewShotLearning'...
remote: Enumerating objects: 103, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 1635 (delta 44), reused 43 (delta 19), pack-reused 1532[K
Receiving objects: 100% (1635/1635), 1.96 MiB | 11.66 MiB/s, done.
Resolving deltas: 100% (948/948), done.
/content/Keras-FewShotLearning
[32mRetrieving Poetry metadata[0m

# Welcome to [32mPoetry[0m!

This will download and install the latest version of [32mPoetry[0m,
a dependency and package manager for Python.

It will add the `poetry` command to [32mPoetry[0m's bin directory, located at:

[33m$HOME/.poetry/bin[0m

This path will then be added to your `PATH` environment variable by
modifying the profile file located at:

[33m$HOME/.profile[0m

You can uninstall at any time by executing this script with the --uninstall option,
and these changes will be reverted.

Installing version: [32m1.0.9[0m
  - Downloading [33

In [2]:
from pathlib import Path

import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from functools import partial
from gpumonitor.callbacks.tf import TFGpuMonitorCallback
from tensorflow.keras.layers import Conv2D, Dropout, Flatten, GlobalMaxPooling2D, Input, MaxPooling2D
from tensorflow.keras.models import Sequential

from keras_fsl.utils.datasets import assign, cache, cache_with_tf_record, read_decode_and_crop_jpeg, transform

In [3]:
train_dataset = tfds.load(name="cifar10", split="train")
output_dir = Path("logs") / "benchmark_caching_performance"
(output_dir / "cifar10").mkdir(exist_ok=True, parents=True)
examples = []
for example in train_dataset:
    tf.io.write_file(str(output_dir / "cifar10" / example["id"].numpy().decode()), tf.io.encode_jpeg(example["image"]))
    examples += [{"id": example["id"].numpy().decode(), "label": example["label"].numpy()}]

[1mDownloading and preparing dataset cifar10/3.0.2 (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to /root/tensorflow_datasets/cifar10/3.0.2...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Extraction completed...', max=1.0, styl…

  ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl`










HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteIYNJKX/cifar10-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/cifar10/3.0.2.incompleteIYNJKX/cifar10-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))

[1mDataset cifar10 downloaded and prepared to /root/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.[0m


In [10]:
datasets = {
    key: pd.DataFrame(examples)
    .assign(filename=lambda df: str(output_dir / "cifar10") + "/" + df.id)
    .pipe(lambda df: tf.data.Dataset.from_tensor_slices(df.to_dict("list")))
    .map(assign(image=read_decode_and_crop_jpeg), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .apply(cache_func)
    for key, cache_func in zip(
        ["tf_record_cache", "dataset_cache", "no_cache"],
        [cache_with_tf_record(output_dir / "tf_record_cache"), cache(output_dir / "dataset_cache"), lambda ds: ds],
    )
}
datasets.update({"baseline": train_dataset})
datasets = {
    key: dataset.map(
        transform(image=partial(tf.image.convert_image_dtype, dtype=tf.float32)),
        num_parallel_calls=tf.data.experimental.AUTOTUNE,
    )
    .map(
        lambda x: (
            tf.ensure_shape(x["image"], train_dataset.element_spec["image"].shape),
            tf.ensure_shape(x["label"], train_dataset.element_spec["label"].shape),
        ),
        num_parallel_calls=tf.data.experimental.AUTOTUNE,
    )
    .batch(64)
    for key, dataset in datasets.items()
}

In [11]:
model = Sequential(
    [
        Input(train_dataset.element_spec["image"].shape),
        Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation="relu"),
        MaxPooling2D(pool_size=2),
        Dropout(0.3),
        Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"),
        MaxPooling2D(pool_size=2),
        Dropout(0.3),
        Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"),
        MaxPooling2D(pool_size=2),
        Dropout(0.3),
        Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu"),
        MaxPooling2D(pool_size=2),
        Dropout(0.3),
        GlobalMaxPooling2D(),
        Flatten(),
    ]
)
model.save_weights(str(output_dir / "initial_weights.h5"))

In [12]:
for key, dataset in datasets.items():
    print(f"Current cache: {key}")
    model.load_weights(str(output_dir / "initial_weights.h5"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
    model.fit(dataset.repeat(), steps_per_epoch=5000, callbacks=[TFGpuMonitorCallback(delay=0.5)])

Current cache: tf_record_cache
[0] Tesla K80        | 49'C,  40 % |   695 / 11441 MB |
Current cache: dataset_cache
[0] Tesla K80        | 52'C,  47 % |   695 / 11441 MB |
Current cache: no_cache
[0] Tesla K80        | 43'C,  26 % |   695 / 11441 MB |
Current cache: baseline
[0] Tesla K80        | 44'C,  48 % |   695 / 11441 MB |
