From da105b027706599610c7f87edc7803732257fa68 Mon Sep 17 00:00:00 2001
From: Benjamin Chislett <chislett.ben@gmail.com>
Date: Tue, 30 Mar 2021 10:49:32 -0400
Subject: [PATCH] Benchmark restructuring and memory profiling (#642)

* Refactor compression benchmarks to runnable design

Plan: each benchmark will export two functions, filename_setup and
filename_run, which designate how the benchmark is to be run. setup will
take any required parameters and return a processed param tuple to be
passed as argument to the runner. The runner is designed to be as slim
as possible so we only measure the crucial code. Then, we can externally
call these functions and time/profile/benchmark on the runtime of the
function call, allowing for a great increase of control. Further
refactors along this design coming soon.

* Refactor dataset iteration benchmarks

Following the previous commit, this refactors the benchmark_dataset_iter
into separate files with the same design as the now-refactored
`benchmark_compress_hub.py`. One step closer to full control

* Add full dataset compute benchmark

It'll be nice to keep track of this as well. Might be subsumed by the
dataset_comparison file, but I'll get to that next.

* Refactor benchmark_random_access into new format

Improves `benchmark_access_hub_full.py` and uses that as a base for
`benchmark_access_hub_slice.py` which replaces functionality from
`benchmark_random_access.py` (now deleted).

* Remove unused line in benchmark_iterate_hub TF

* Local variants of iteration benchmarks using tfds

* Remove dataset compare benchmarks

Existing refactored benchmarks now cover all cases once present in this
file.

* Rename remaining un-refactored benchmarks "legacy"

Until these can be converted, I want to have a distinction to know what
is and isn't compatible with the new runner (next few commits). This
will probably be fixed before going in

* Fix minor issues in total access benchmarks

* Initial prototype for benchmark runner notebook

* Update benchmark runner notebook

* Add psutil to benchmark requirements

* Fix pytorch and tensorflow local benchmarks

* Add network benchmarking and expand suites

* Update .gitignore with benchmark local data

* Auto-fix issues with black

* Add time to network monitor output to plot better
---
 .gitignore                                    |   4 +
 benchmarks/benchmark_access_hub_full.py       |  16 +
 benchmarks/benchmark_access_hub_slice.py      |  16 +
 benchmarks/benchmark_compress_hub.py          |  28 ++
 benchmarks/benchmark_compress_pillow.py       |  16 +
 benchmarks/benchmark_compress_time.py         |  47 ---
 benchmarks/benchmark_dataset_comparison.py    | 270 --------------
 benchmarks/benchmark_dataset_iter.py          |  63 ----
 .../benchmark_iterate_hub_local_pytorch.py    |  59 ++++
 .../benchmark_iterate_hub_local_tensorflow.py |  21 ++
 benchmarks/benchmark_iterate_hub_pytorch.py   |  24 ++
 .../benchmark_iterate_hub_tensorflow.py       |  14 +
 benchmarks/benchmark_random_access.py         |  36 --
 benchmarks/benchmark_run.sh                   |   5 -
 ... => legacy_benchmark_sequential_access.py} |   0
 ...y => legacy_benchmark_sequential_write.py} |   0
 benchmarks/runner.ipynb                       | 328 ++++++++++++++++++
 requirements-benchmarks.txt                   |   1 +
 18 files changed, 527 insertions(+), 421 deletions(-)
 create mode 100644 benchmarks/benchmark_access_hub_full.py
 create mode 100644 benchmarks/benchmark_access_hub_slice.py
 create mode 100644 benchmarks/benchmark_compress_hub.py
 create mode 100644 benchmarks/benchmark_compress_pillow.py
 delete mode 100644 benchmarks/benchmark_compress_time.py
 delete mode 100644 benchmarks/benchmark_dataset_comparison.py
 delete mode 100644 benchmarks/benchmark_dataset_iter.py
 create mode 100644 benchmarks/benchmark_iterate_hub_local_pytorch.py
 create mode 100644 benchmarks/benchmark_iterate_hub_local_tensorflow.py
 create mode 100644 benchmarks/benchmark_iterate_hub_pytorch.py
 create mode 100644 benchmarks/benchmark_iterate_hub_tensorflow.py
 delete mode 100644 benchmarks/benchmark_random_access.py
 delete mode 100644 benchmarks/benchmark_run.sh
 rename benchmarks/{benchmark_sequential_access.py => legacy_benchmark_sequential_access.py} (100%)
 rename benchmarks/{benchmark_sequential_write.py => legacy_benchmark_sequential_write.py} (100%)
 create mode 100644 benchmarks/runner.ipynb

diff --git a/.gitignore b/.gitignore
index 3421eb801b..974eec3c5b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -195,3 +195,7 @@ cov.xml
 hub/api/cov.xml
 hub/api/nested_seq
 nested_seq
+
+# Benchmark local test data (auto-downloaded)
+benchmarks/hub_data
+benchmarks/torch_data
diff --git a/benchmarks/benchmark_access_hub_full.py b/benchmarks/benchmark_access_hub_full.py
new file mode 100644
index 0000000000..0d1780e798
--- /dev/null
+++ b/benchmarks/benchmark_access_hub_full.py
@@ -0,0 +1,16 @@
+from hub import Dataset
+
+
+def benchmark_access_hub_full_setup(dataset_name, field=None):
+    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
+
+    keys = dset.keys
+    if field is not None:
+        keys = (field,)
+    return (dset, keys)
+
+
+def benchmark_access_hub_full_run(params):
+    dset, keys = params
+    for k in keys:
+        dset[k].compute()
diff --git a/benchmarks/benchmark_access_hub_slice.py b/benchmarks/benchmark_access_hub_slice.py
new file mode 100644
index 0000000000..9d9b3f8eb7
--- /dev/null
+++ b/benchmarks/benchmark_access_hub_slice.py
@@ -0,0 +1,16 @@
+from hub import Dataset
+
+
+def benchmark_access_hub_slice_setup(dataset_name, slice_bounds, field=None):
+    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
+
+    keys = dset.keys
+    if field is not None:
+        keys = (field,)
+    return (dset, slice_bounds, keys)
+
+
+def benchmark_access_hub_slice_run(params):
+    dset, slice_bounds, keys = params
+    for k in keys:
+        dset[k][slice_bounds[0] : slice_bounds[1]].compute()
diff --git a/benchmarks/benchmark_compress_hub.py b/benchmarks/benchmark_compress_hub.py
new file mode 100644
index 0000000000..648b659a6c
--- /dev/null
+++ b/benchmarks/benchmark_compress_hub.py
@@ -0,0 +1,28 @@
+import numpy as np
+from PIL import Image
+
+import hub
+
+
+def benchmark_compress_hub_setup(
+    times, image_path="./images/compression_benchmark_image.png"
+):
+    img = Image.open(image_path)
+    arr = np.array(img)
+    ds = hub.Dataset(
+        "./data/bench_png_compression",
+        mode="w",
+        shape=times,
+        schema={"image": hub.schema.Image(arr.shape, compressor="png")},
+    )
+
+    batch = np.zeros((times,) + arr.shape, dtype="uint8")
+    for i in range(times):
+        batch[i] = arr
+
+    return (ds, times, batch)
+
+
+def benchmark_compress_hub_run(params):
+    ds, times, batch = params
+    ds["image", :times] = batch
diff --git a/benchmarks/benchmark_compress_pillow.py b/benchmarks/benchmark_compress_pillow.py
new file mode 100644
index 0000000000..1d37dfbaf5
--- /dev/null
+++ b/benchmarks/benchmark_compress_pillow.py
@@ -0,0 +1,16 @@
+from PIL import Image
+from io import BytesIO
+
+
+def benchmark_compress_pillow_setup(
+    times, image_path="./images/compression_benchmark_image.png"
+):
+    img = Image.open(image_path)
+    return (img, times)
+
+
+def benchmark_compress_pillow_run(params):
+    img, times = params
+    for _ in range(times):
+        b = BytesIO()
+        img.save(b, format="png")
diff --git a/benchmarks/benchmark_compress_time.py b/benchmarks/benchmark_compress_time.py
deleted file mode 100644
index 7ffca63493..0000000000
--- a/benchmarks/benchmark_compress_time.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-License:
-This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
-If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-"""
-
-import numpy as np
-from PIL import Image
-from io import BytesIO
-
-import hub
-import hub.schema
-from hub.utils import Timer
-
-IMAGE_PATH = "./images/compression_benchmark_image.png"
-IMG = Image.open(IMAGE_PATH)
-
-REPEAT_TIMES = 100
-
-
-def bench_pil_compression(times=REPEAT_TIMES):
-    with Timer("PIL compression"):
-        for i in range(times):
-            b = BytesIO()
-            IMG.save(b, format="png")
-
-
-def bench_hub_compression(times=REPEAT_TIMES):
-    arr = np.array(IMG)
-    ds = hub.Dataset(
-        "./data/bench_png_compression",
-        mode="w",
-        shape=times,
-        schema={"image": hub.schema.Image(arr.shape, compressor="png")},
-    )
-
-    batch = np.zeros((times,) + arr.shape, dtype="uint8")
-    for i in range(times):
-        batch[i] = arr
-
-    with Timer("Hub compression"):
-        ds["image", :times] = batch
-
-
-if __name__ == "__main__":
-    bench_pil_compression()
-    bench_hub_compression()
diff --git a/benchmarks/benchmark_dataset_comparison.py b/benchmarks/benchmark_dataset_comparison.py
deleted file mode 100644
index da332ff051..0000000000
--- a/benchmarks/benchmark_dataset_comparison.py
+++ /dev/null
@@ -1,270 +0,0 @@
-import torch
-import torchvision
-from torchvision import transforms
-import tensorflow as tf
-import tensorflow_datasets as tfds
-
-from hub import Dataset
-from hub.utils import Timer
-import os
-
-# import math
-
-BATCH_SIZE = 16
-PREFETCH_SIZE = 4
-NUM_WORKERS = 1
-# CPUS = os.cpu_count()
-# NUM_WORKERS = [
-#    min(2 ** n, CPUS) for n in range(math.ceil(math.log2(CPUS)) + 1)]
-
-ROOT = "."
-S3_PATH = "s3://snark-benchmarks/datasets/Hub/"
-
-DATASET_INFO = [
-    {
-        "name": "mnist",
-        "pytorch_name": "MNIST",
-        "hub_name": "activeloop/mnist",
-        "s3_name": "mnist",
-        "split": "train+test",
-    },
-    {
-        "name": "places365_small",
-        "pytorch_name": "Places365",
-        "hub_name": "hydp/places365_small_train",
-        "s3_name": "places365_small_train",
-        "split": "train",
-        "kwargs": {"small": True},
-    },
-]
-
-
-class HubAdapter(torch.utils.data.Dataset):
-    def __init__(self, ds):
-        self.ds = ds
-
-    def __len__(self):
-        return len(self.ds)
-
-    @property
-    def shape(self):
-        return (len(self), None, None, None)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self[i]
-
-    def __getitem__(self, index):
-        img, label = self.ds.__getitem__(index)
-        return {"image": img, "label": label}
-
-
-class Timer(Timer):
-    def __init__(self, text):
-        super().__init__(text)
-        self._text = f"BENCHMARK - {self._text}"
-
-
-def prepare_torch_dataset(dataset_info):
-    split = dataset_info["split"].split("+")
-    trans = transforms.Compose([transforms.ToTensor()])
-    data_path = "torch_data"
-    dset_type = getattr(torchvision.datasets, dataset_info["pytorch_name"])
-    kwargs = dataset_info.get("kwargs", {})
-    if "train" in split:
-        dset = dset_type(
-            os.path.join(ROOT, data_path), transform=trans, download=True, **kwargs
-        )
-    else:
-        dset = None
-    if "test" in split:
-        test_dset = dset_type(
-            os.path.join(ROOT, data_path),
-            train=False,
-            transform=trans,
-            download=True,
-            **kwargs,
-        )
-    else:
-        test_dset = None
-    if len(split) > 1:
-        dset = torch.utils.data.ConcatDataset([dset, test_dset])
-    return dset if dset else test_dset
-
-
-def time_iter_hub_local_pytorch(
-    dataset_info,
-    batch_size=BATCH_SIZE,
-    prefetch_factor=PREFETCH_SIZE,
-    num_workers=NUM_WORKERS,
-    process=None,
-):
-    mnist = prepare_torch_dataset(dataset_info)
-    path = os.path.join(ROOT, "Hub_data", "torch")
-    Dataset.from_pytorch(HubAdapter(mnist)).store(path)
-    dset = Dataset(path, cache=False, storage_cache=False, mode="r")
-
-    loader = torch.utils.data.DataLoader(
-        dset.to_pytorch(),
-        batch_size=batch_size,
-        prefetch_factor=prefetch_factor,
-        num_workers=num_workers,
-    )
-
-    with Timer("Hub (local) `.to_pytorch()`"):
-        for image, label in loader:
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_hub_wasabi_pytorch(
-    dataset_info,
-    batch_size=BATCH_SIZE,
-    prefetch_factor=PREFETCH_SIZE,
-    num_workers=NUM_WORKERS,
-    process=None,
-):
-    dset = Dataset(dataset_info["hub_name"], cache=False, storage_cache=False, mode="r")
-    loader = torch.utils.data.DataLoader(
-        dset.to_pytorch(),
-        batch_size=batch_size,
-        prefetch_factor=prefetch_factor,
-        num_workers=num_workers,
-    )
-
-    with Timer("Hub (remote - Wasabi) `.to_pytorch()`"):
-        for image, label in loader:
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_hub_s3_pytorch(
-    dataset_info,
-    batch_size=BATCH_SIZE,
-    prefetch_factor=PREFETCH_SIZE,
-    num_workers=NUM_WORKERS,
-    process=None,
-):
-    dset = Dataset(
-        f"{S3_PATH}{dataset_info['s3_name']}",
-        cache=False,
-        storage_cache=False,
-        mode="r",
-    )
-    loader = torch.utils.data.DataLoader(
-        dset.to_pytorch(),
-        batch_size=batch_size,
-        prefetch_factor=prefetch_factor,
-        num_workers=num_workers,
-    )
-
-    with Timer("Hub (remote - S3) `.to_pytorch()`"):
-        for image, label in loader:
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_pytorch(
-    dataset_info,
-    batch_size=BATCH_SIZE,
-    prefetch_factor=PREFETCH_SIZE,
-    num_workers=NUM_WORKERS,
-    process=None,
-):
-    dset = prepare_torch_dataset(dataset_info)
-
-    loader = torch.utils.data.DataLoader(
-        dset,
-        batch_size=batch_size,
-        prefetch_factor=prefetch_factor,
-        num_workers=num_workers,
-    )
-
-    with Timer("PyTorch (local, native)"):
-        for image, label in loader:
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_hub_local_tensorflow(
-    dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None
-):
-    dset = Dataset.from_tfds(dataset_info["name"], split=dataset_info["split"])
-    path = os.path.join(ROOT, "Hub_data", "tfds")
-    dset.store(path)
-    dset = Dataset(path, cache=False, storage_cache=False, mode="r")
-    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
-
-    with Timer("Hub (local) `.to_tensorflow()`"):
-        for batch in loader:
-            image = batch["image"]
-            label = batch["label"]
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_hub_wasabi_tensorflow(
-    dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None
-):
-    dset = Dataset(dataset_info["hub_name"], cache=False, storage_cache=False, mode="r")
-    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
-
-    with Timer("Hub (remote - Wasabi) `.to_tensorflow()`"):
-        for batch in loader:
-            image = batch["image"]
-            label = batch["label"]
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_hub_s3_tensorflow(
-    dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None
-):
-    dset = Dataset(
-        f"{S3_PATH}{dataset_info['s3_name']}",
-        cache=False,
-        storage_cache=False,
-        mode="r",
-    )
-    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
-
-    with Timer("Hub (remote - S3) `.to_tensorflow()`"):
-        for batch in loader:
-            image = batch["image"]
-            label = batch["label"]
-            if process is not None:
-                process(image, label)
-
-
-def time_iter_tensorflow(
-    dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None
-):
-    # turn off auto-splitting and disable multiprocessing
-    options = tf.data.Options()
-    blockAS = tf.data.experimental.AutoShardPolicy.OFF
-    options.experimental_distribute.auto_shard_policy = blockAS
-    options.experimental_optimization.autotune_cpu_budget = 1
-
-    loader = tfds.load(dataset_info["name"], split=dataset_info["split"]).with_options(
-        options
-    )
-
-    with Timer("Tensorflow (local, native - TFDS)"):
-        for batch in loader:
-            image = batch["image"]
-            label = batch["label"]
-            if process is not None:
-                process(image, label)
-
-
-if __name__ == "__main__":
-    for i, info in enumerate(DATASET_INFO):
-        print(f'BENCHMARK DATASET #{i}: {info["name"]}')
-        time_iter_hub_wasabi_pytorch(info)
-        time_iter_hub_local_pytorch(info)
-        time_iter_hub_s3_pytorch(info)
-        time_iter_pytorch(info)
-        time_iter_hub_wasabi_tensorflow(info)
-        time_iter_hub_local_tensorflow(info)
-        time_iter_hub_s3_tensorflow(info)
-        time_iter_tensorflow(info)
diff --git a/benchmarks/benchmark_dataset_iter.py b/benchmarks/benchmark_dataset_iter.py
deleted file mode 100644
index 7f282b318e..0000000000
--- a/benchmarks/benchmark_dataset_iter.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-License:
-This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
-If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-"""
-
-import torch
-
-from hub import Dataset
-from hub.utils import Timer
-
-DATASET_NAMES = ["activeloop/mnist", "activeloop/cifar10_train"]
-
-BATCH_SIZES = [1, 16, 128]
-
-PREFETCH_SIZES = [1, 4, 16, 128]
-
-
-def time_iter_pytorch(
-    dataset_name="activeloop/mnist", batch_size=1, prefetch_factor=0, process=None
-):
-
-    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
-
-    loader = torch.utils.data.DataLoader(
-        dset.to_pytorch(),
-        batch_size=batch_size,
-        prefetch_factor=prefetch_factor,
-        num_workers=1,
-    )
-
-    with Timer(
-        f"{dataset_name} PyTorch prefetch {prefetch_factor:03} in batches of {batch_size:03}"
-    ):
-        for idx, (image, label) in enumerate(loader):
-            if process is not None:
-                process(idx, image, label)
-
-
-def time_iter_tensorflow(
-    dataset_name="activeloop/mnist", batch_size=1, prefetch_factor=0, process=None
-):
-
-    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
-
-    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
-
-    with Timer(
-        f"{dataset_name} TF prefetch {prefetch_factor:03} in batches of {batch_size:03}"
-    ):
-        for idx, batch in enumerate(loader):
-            image = batch["image"]
-            label = batch["label"]
-            if process is not None:
-                process(idx, image, label)
-
-
-if __name__ == "__main__":
-    for name in DATASET_NAMES:
-        for size in BATCH_SIZES:
-            for prefetch in PREFETCH_SIZES:
-                time_iter_pytorch(name, size, prefetch, None)
-                time_iter_tensorflow(name, size, prefetch, None)
diff --git a/benchmarks/benchmark_iterate_hub_local_pytorch.py b/benchmarks/benchmark_iterate_hub_local_pytorch.py
new file mode 100644
index 0000000000..5915a0d079
--- /dev/null
+++ b/benchmarks/benchmark_iterate_hub_local_pytorch.py
@@ -0,0 +1,59 @@
+import torchvision
+from torchvision import transforms
+import torch
+import os
+
+from hub import Dataset
+
+
+class HubAdapter(torch.utils.data.Dataset):
+    def __init__(self, ds):
+        self.ds = ds
+
+    def __len__(self):
+        return len(self.ds)
+
+    @property
+    def shape(self):
+        return (len(self), None, None, None)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self[i]
+
+    def __getitem__(self, index):
+        img, label = self.ds.__getitem__(index)
+        return {"image": img, "label": label}
+
+
+def benchmark_iterate_hub_local_pytorch_setup(
+    dataset_name, dataset_split, batch_size, prefetch_factor, num_workers=1
+):
+    trans = transforms.Compose([transforms.ToTensor()])
+    data_path = os.path.join(".", "torch_data")
+    dset_type = getattr(torchvision.datasets, dataset_name)
+    path = os.path.join(".", "hub_data", "tfds")
+    dset = dset_type(
+        data_path,
+        transform=trans,
+        train=(False if "test" in dataset_split else None),
+        download=True,
+    )
+
+    Dataset.from_pytorch(HubAdapter(dset)).store(path)
+    dset = Dataset(path, cache=False, storage_cache=False, mode="r")
+
+    loader = torch.utils.data.DataLoader(
+        dset.to_pytorch(),
+        batch_size=batch_size,
+        prefetch_factor=prefetch_factor,
+        num_workers=num_workers,
+    )
+
+    return (loader,)
+
+
+def benchmark_iterate_hub_local_pytorch_run(params):
+    (loader,) = params
+    for _ in loader:
+        pass
diff --git a/benchmarks/benchmark_iterate_hub_local_tensorflow.py b/benchmarks/benchmark_iterate_hub_local_tensorflow.py
new file mode 100644
index 0000000000..3c03f2ee5a
--- /dev/null
+++ b/benchmarks/benchmark_iterate_hub_local_tensorflow.py
@@ -0,0 +1,21 @@
+from hub import Dataset
+import os
+
+
+def benchmark_iterate_hub_local_tensorflow_setup(
+    dataset_name, dataset_split, batch_size, prefetch_factor
+):
+    dset = Dataset.from_tfds(dataset_name, split=dataset_split)
+    path = os.path.join(".", "hub_data", "tfds")
+    dset.store(path)
+    dset = Dataset(path, cache=False, storage_cache=False, mode="r")
+
+    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
+
+    return (loader,)
+
+
+def benchmark_iterate_hub_local_tensorflow_run(params):
+    (loader,) = params
+    for _ in loader:
+        pass
diff --git a/benchmarks/benchmark_iterate_hub_pytorch.py b/benchmarks/benchmark_iterate_hub_pytorch.py
new file mode 100644
index 0000000000..5e4499a39e
--- /dev/null
+++ b/benchmarks/benchmark_iterate_hub_pytorch.py
@@ -0,0 +1,24 @@
+import torch
+
+from hub import Dataset
+
+
+def benchmark_iterate_hub_pytorch_setup(
+    dataset_name, batch_size, prefetch_factor, num_workers=1
+):
+    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
+
+    loader = torch.utils.data.DataLoader(
+        dset.to_pytorch(),
+        batch_size=batch_size,
+        prefetch_factor=prefetch_factor,
+        num_workers=num_workers,
+    )
+
+    return (loader,)
+
+
+def benchmark_iterate_hub_pytorch_run(params):
+    (loader,) = params
+    for _ in loader:
+        pass
diff --git a/benchmarks/benchmark_iterate_hub_tensorflow.py b/benchmarks/benchmark_iterate_hub_tensorflow.py
new file mode 100644
index 0000000000..21e4c71a81
--- /dev/null
+++ b/benchmarks/benchmark_iterate_hub_tensorflow.py
@@ -0,0 +1,14 @@
+from hub import Dataset
+
+
+def benchmark_iterate_hub_tensorflow_setup(dataset_name, batch_size, prefetch_factor):
+    dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r")
+
+    loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor)
+    return (loader,)
+
+
+def benchmark_iterate_hub_tensorflow_run(params):
+    (loader,) = params
+    for _ in loader:
+        pass
diff --git a/benchmarks/benchmark_random_access.py b/benchmarks/benchmark_random_access.py
deleted file mode 100644
index f0033dc158..0000000000
--- a/benchmarks/benchmark_random_access.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"""
-License:
-This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
-If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-"""
-
-from random import randint
-from itertools import chain
-
-from hub import Dataset
-from hub.utils import Timer
-
-DATASET_NAMES = [
-    "activeloop/mnist",
-    "activeloop/omniglot_test",
-    "activeloop/cifar10_train",
-    "activeloop/cifar100_train",
-]
-
-SPAN_POWER_MAX = 10
-
-
-def time_random_access(
-    dataset_name="activeloop/mnist", offset=1000, span=1000, field="image"
-):
-    dset = Dataset(dataset_name, cache=False, storage_cache=False)
-    with Timer(f"{dataset_name} read at offset {offset:03} of length {span:03}"):
-        dset[field][offset : offset + span].compute()
-
-
-if __name__ == "__main__":
-    for name in DATASET_NAMES:
-        for span in range(SPAN_POWER_MAX):
-            offset = randint(0, 999)
-            time_random_access(name, offset, 2 ** span)
-        print()
diff --git a/benchmarks/benchmark_run.sh b/benchmarks/benchmark_run.sh
deleted file mode 100644
index 154a24855a..0000000000
--- a/benchmarks/benchmark_run.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-for filename in ./benchmark_*.py; do
-    [ -e "$filename" ] || continue
-    python3 ./$filename >> results.log
-done
\ No newline at end of file
diff --git a/benchmarks/benchmark_sequential_access.py b/benchmarks/legacy_benchmark_sequential_access.py
similarity index 100%
rename from benchmarks/benchmark_sequential_access.py
rename to benchmarks/legacy_benchmark_sequential_access.py
diff --git a/benchmarks/benchmark_sequential_write.py b/benchmarks/legacy_benchmark_sequential_write.py
similarity index 100%
rename from benchmarks/benchmark_sequential_write.py
rename to benchmarks/legacy_benchmark_sequential_write.py
diff --git a/benchmarks/runner.ipynb b/benchmarks/runner.ipynb
new file mode 100644
index 0000000000..2d20e4e93b
--- /dev/null
+++ b/benchmarks/runner.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hub.utils import Timer\n",
+    "from hub import Dataset\n",
+    "from memory_profiler import memory_usage\n",
+    "import asyncio\n",
+    "import psutil\n",
+    "import time\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Network Monitoring Helpers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def network_monitor(check_finish, sample_frequency=1):\n",
+    "    samples = []\n",
+    "    prev_bytes = None\n",
+    "    while True:\n",
+    "        stats = psutil.net_io_counters()\n",
+    "        if prev_bytes is not None:\n",
+    "            samples.append((time.time(), stats.bytes_recv - prev_bytes))\n",
+    "        prev_bytes = stats.bytes_recv\n",
+    "        await asyncio.sleep(sample_frequency)\n",
+    "        if check_finish():\n",
+    "            return samples\n",
+    "\n",
+    "async def network_monitor_call(f):\n",
+    "    is_finished = False\n",
+    "    check_finish = lambda: is_finished\n",
+    "    task = asyncio.create_task(network_monitor(check_finish))\n",
+    "    await asyncio.to_thread(f)\n",
+    "    is_finished = True\n",
+    "    await task\n",
+    "    return task.result()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmark Runners"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def time_runner(params):\n",
+    "    run_function, setup_function, setup_params = params\n",
+    "    params = setup_function(*setup_params)\n",
+    "    begin = time.time()\n",
+    "    run_function(params)\n",
+    "    end = time.time()\n",
+    "    return end - begin\n",
+    "\n",
+    "def memory_runner(params):\n",
+    "    run_function, setup_function, setup_params = params\n",
+    "    params = setup_function(*setup_params)\n",
+    "    baseline = memory_usage()\n",
+    "    usage = memory_usage((run_function, (params,)))\n",
+    "    return (max(baseline), max(usage))\n",
+    "\n",
+    "async def network_runner(params):\n",
+    "    run_function, setup_function, setup_params = params\n",
+    "    params = setup_function(*setup_params)\n",
+    "    return await network_monitor_call(lambda: run_function(params))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Benchmarks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Full Dataset Access"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_access_hub_full import benchmark_access_hub_full_setup, benchmark_access_hub_full_run\n",
+    "access_full_suite = [(benchmark_access_hub_full_run, benchmark_access_hub_full_setup, (dset,)) for dset in ['activeloop/mnist']]\n",
+    "\n",
+    "hub_full_times = list(map(time_runner, access_full_suite))\n",
+    "hub_full_mem_usages = list(map(memory_runner, access_full_suite))\n",
+    "hub_full_net_usages = [await network_runner(params) for params in access_full_suite]\n",
+    "\n",
+    "print(hub_full_times)\n",
+    "print(hub_full_mem_usages)\n",
+    "print(hub_full_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Random Slice Access"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_access_hub_slice import benchmark_access_hub_slice_setup, benchmark_access_hub_slice_run\n",
+    "access_slice_suite = [(benchmark_access_hub_slice_run, benchmark_access_hub_slice_setup, t) for t in [('activeloop/mnist', (0, 256)), ('activeloop/mnist', (2048, 2048+256))]]\n",
+    "\n",
+    "hub_slice_times = list(map(time_runner, access_slice_suite))\n",
+    "hub_slice_mem_usages = list(map(memory_runner, access_slice_suite))\n",
+    "hub_slice_net_usages = [await network_runner(params) for params in access_slice_suite]\n",
+    "\n",
+    "print(hub_slice_times)\n",
+    "print(hub_slice_mem_usages)\n",
+    "print(hub_slice_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Compression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_compress_hub import benchmark_compress_hub_setup, benchmark_compress_hub_run\n",
+    "hub_compress_suite = [(benchmark_compress_hub_run, benchmark_compress_hub_setup, t) for t in [(32,)]]\n",
+    "\n",
+    "hub_compress_times = list(map(time_runner, hub_compress_suite))\n",
+    "hub_compress_mem_usages = list(map(memory_runner, hub_compress_suite))\n",
+    "\n",
+    "print(hub_compress_times)\n",
+    "print(hub_compress_mem_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pillow Compression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_compress_pillow import benchmark_compress_pillow_setup, benchmark_compress_pillow_run\n",
+    "pillow_compress_suite = [(benchmark_compress_pillow_run, benchmark_compress_pillow_setup, t) for t in [(32,)]]\n",
+    "\n",
+    "pillow_compress_times = list(map(time_runner, pillow_compress_suite))\n",
+    "pillow_compress_mem_usages = list(map(memory_runner, pillow_compress_suite))\n",
+    "\n",
+    "print(pillow_compress_times)\n",
+    "print(pillow_compress_mem_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Local Dataset Iteration - PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_iterate_hub_local_pytorch import benchmark_iterate_hub_local_pytorch_setup, benchmark_iterate_hub_local_pytorch_run\n",
+    "iterate_local_pytorch_suite = [(benchmark_iterate_hub_local_pytorch_run, benchmark_iterate_hub_local_pytorch_setup, t) for t in [('MNIST', 'train', 128, 128)]]\n",
+    "\n",
+    "hub_iterate_local_pytorch_times = list(map(time_runner, iterate_local_pytorch_suite))\n",
+    "hub_iterate_local_pytorch_mem_usages = list(map(memory_runner, iterate_local_pytorch_suite))\n",
+    "hub_iterate_local_pytorch_net_usages = [await network_runner(params) for params in iterate_local_pytorch_suite]\n",
+    "\n",
+    "print(hub_iterate_local_pytorch_times)\n",
+    "print(hub_iterate_local_pytorch_mem_usages)\n",
+    "print(hub_iterate_local_pytorch_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Local Dataset Iteration - Tensorflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_iterate_hub_local_tensorflow import benchmark_iterate_hub_local_tensorflow_setup, benchmark_iterate_hub_local_tensorflow_run\n",
+    "iterate_local_tensorflow_suite = [(benchmark_iterate_hub_local_tensorflow_run, benchmark_iterate_hub_local_tensorflow_setup, t) for t in [('mnist', 'train', 128, 128)]]\n",
+    "\n",
+    "hub_iterate_local_tensorflow_times = list(map(time_runner, iterate_local_tensorflow_suite))\n",
+    "hub_iterate_local_tensorflow_mem_usages = list(map(memory_runner, iterate_local_tensorflow_suite))\n",
+    "hub_iterate_local_tensorflow_net_usages = [await network_runner(params) for params in iterate_local_tensorflow_suite]\n",
+    "\n",
+    "print(hub_iterate_local_tensorflow_times)\n",
+    "print(hub_iterate_local_tensorflow_mem_usages)\n",
+    "print(hub_iterate_local_tensorflow_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Dataset Iteration - PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_iterate_hub_pytorch import benchmark_iterate_hub_pytorch_setup, benchmark_iterate_hub_pytorch_run\n",
+    "iterate_pytorch_suite = [(benchmark_iterate_hub_pytorch_run, benchmark_iterate_hub_pytorch_setup, t) for t in [('activeloop/mnist', 128, 128)]]\n",
+    "\n",
+    "hub_iterate_pytorch_times = list(map(time_runner, iterate_pytorch_suite))\n",
+    "hub_iterate_pytorch_mem_usages = list(map(memory_runner, iterate_pytorch_suite))\n",
+    "hub_iterate_pytorch_net_usages = [await network_runner(params) for params in iterate_pytorch_suite]\n",
+    "\n",
+    "print(hub_iterate_pytorch_times)\n",
+    "print(hub_iterate_pytorch_mem_usages)\n",
+    "print(hub_iterate_pytorch_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hub Dataset Iteration - Tensorflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmark_iterate_hub_tensorflow import benchmark_iterate_hub_tensorflow_setup, benchmark_iterate_hub_tensorflow_run\n",
+    "iterate_tensorflow_suite = [(benchmark_iterate_hub_tensorflow_run, benchmark_iterate_hub_tensorflow_setup, t) for t in [('activeloop/mnist', 128, 128)]]\n",
+    "\n",
+    "hub_iterate_tensorflow_times = list(map(time_runner, iterate_tensorflow_suite))\n",
+    "hub_iterate_tensorflow_mem_usages = list(map(memory_runner, iterate_tensorflow_suite))\n",
+    "hub_iterate_tensorflow_net_usages = [await network_runner(params) for params in iterate_tensorflow_suite]\n",
+    "\n",
+    "print(hub_iterate_tensorflow_times)\n",
+    "print(hub_iterate_tensorflow_mem_usages)\n",
+    "print(hub_iterate_tensorflow_net_usages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Output Benchmark Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write to your favourite file format here"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements-benchmarks.txt b/requirements-benchmarks.txt
index 578a345cea..f119fd4c83 100644
--- a/requirements-benchmarks.txt
+++ b/requirements-benchmarks.txt
@@ -2,3 +2,4 @@ tiledb==0.8.6
 torchvision==0.9.0
 altair==4.1.0
 altair_saver==0.5.0
+psutil>=5.8