In [None]:
from hub.utils import Timer
from hub import Dataset
from memory_profiler import memory_usage
import asyncio
import psutil
import time
import os

## Network Monitoring Helpers

In [None]:
async def network_monitor(check_finish, sample_frequency=1):
    samples = []
    prev_bytes = None
    while True:
        stats = psutil.net_io_counters()
        if prev_bytes is not None:
            samples.append((time.time(), stats.bytes_recv - prev_bytes))
        prev_bytes = stats.bytes_recv
        await asyncio.sleep(sample_frequency)
        if check_finish():
            return samples

async def network_monitor_call(f):
    is_finished = False
    check_finish = lambda: is_finished
    task = asyncio.create_task(network_monitor(check_finish))
    await asyncio.to_thread(f)
    is_finished = True
    await task
    return task.result()

## Benchmark Runners

In [None]:
def time_runner(params):
    run_function, setup_function, setup_params = params
    params = setup_function(*setup_params)
    begin = time.time()
    run_function(params)
    end = time.time()
    return end - begin

def memory_runner(params):
    run_function, setup_function, setup_params = params
    params = setup_function(*setup_params)
    baseline = memory_usage()
    usage = memory_usage((run_function, (params,)))
    return (max(baseline), max(usage))

async def network_runner(params):
    run_function, setup_function, setup_params = params
    params = setup_function(*setup_params)
    return await network_monitor_call(lambda: run_function(params))

# Benchmarks

## Hub Full Dataset Access

In [None]:
from benchmark_access_hub_full import benchmark_access_hub_full_setup, benchmark_access_hub_full_run
access_full_suite = [(benchmark_access_hub_full_run, benchmark_access_hub_full_setup, (dset,)) for dset in ['activeloop/mnist']]

hub_full_times = list(map(time_runner, access_full_suite))
hub_full_mem_usages = list(map(memory_runner, access_full_suite))
hub_full_net_usages = [await network_runner(params) for params in access_full_suite]

print(hub_full_times)
print(hub_full_mem_usages)
print(hub_full_net_usages)

## Hub Random Slice Access

In [None]:
from benchmark_access_hub_slice import benchmark_access_hub_slice_setup, benchmark_access_hub_slice_run
access_slice_suite = [(benchmark_access_hub_slice_run, benchmark_access_hub_slice_setup, t) for t in [('activeloop/mnist', (0, 256)), ('activeloop/mnist', (2048, 2048+256))]]

hub_slice_times = list(map(time_runner, access_slice_suite))
hub_slice_mem_usages = list(map(memory_runner, access_slice_suite))
hub_slice_net_usages = [await network_runner(params) for params in access_slice_suite]

print(hub_slice_times)
print(hub_slice_mem_usages)
print(hub_slice_net_usages)

## Hub Compression

In [None]:
from benchmark_compress_hub import benchmark_compress_hub_setup, benchmark_compress_hub_run
hub_compress_suite = [(benchmark_compress_hub_run, benchmark_compress_hub_setup, t) for t in [(32,)]]

hub_compress_times = list(map(time_runner, hub_compress_suite))
hub_compress_mem_usages = list(map(memory_runner, hub_compress_suite))

print(hub_compress_times)
print(hub_compress_mem_usages)

## Pillow Compression

In [None]:
from benchmark_compress_pillow import benchmark_compress_pillow_setup, benchmark_compress_pillow_run
pillow_compress_suite = [(benchmark_compress_pillow_run, benchmark_compress_pillow_setup, t) for t in [(32,)]]

pillow_compress_times = list(map(time_runner, pillow_compress_suite))
pillow_compress_mem_usages = list(map(memory_runner, pillow_compress_suite))

print(pillow_compress_times)
print(pillow_compress_mem_usages)

## Hub Local Dataset Iteration - PyTorch

In [None]:
from benchmark_iterate_hub_local_pytorch import benchmark_iterate_hub_local_pytorch_setup, benchmark_iterate_hub_local_pytorch_run
iterate_local_pytorch_suite = [(benchmark_iterate_hub_local_pytorch_run, benchmark_iterate_hub_local_pytorch_setup, t) for t in [('MNIST', 'train', 128, 128)]]

hub_iterate_local_pytorch_times = list(map(time_runner, iterate_local_pytorch_suite))
hub_iterate_local_pytorch_mem_usages = list(map(memory_runner, iterate_local_pytorch_suite))
hub_iterate_local_pytorch_net_usages = [await network_runner(params) for params in iterate_local_pytorch_suite]

print(hub_iterate_local_pytorch_times)
print(hub_iterate_local_pytorch_mem_usages)
print(hub_iterate_local_pytorch_net_usages)

## Hub Local Dataset Iteration - Tensorflow

In [None]:
from benchmark_iterate_hub_local_tensorflow import benchmark_iterate_hub_local_tensorflow_setup, benchmark_iterate_hub_local_tensorflow_run
iterate_local_tensorflow_suite = [(benchmark_iterate_hub_local_tensorflow_run, benchmark_iterate_hub_local_tensorflow_setup, t) for t in [('mnist', 'train', 128, 128)]]

hub_iterate_local_tensorflow_times = list(map(time_runner, iterate_local_tensorflow_suite))
hub_iterate_local_tensorflow_mem_usages = list(map(memory_runner, iterate_local_tensorflow_suite))
hub_iterate_local_tensorflow_net_usages = [await network_runner(params) for params in iterate_local_tensorflow_suite]

print(hub_iterate_local_tensorflow_times)
print(hub_iterate_local_tensorflow_mem_usages)
print(hub_iterate_local_tensorflow_net_usages)

## Hub Dataset Iteration - PyTorch

In [None]:
from benchmark_iterate_hub_pytorch import benchmark_iterate_hub_pytorch_setup, benchmark_iterate_hub_pytorch_run
iterate_pytorch_suite = [(benchmark_iterate_hub_pytorch_run, benchmark_iterate_hub_pytorch_setup, t) for t in [('activeloop/mnist', 128, 128)]]

hub_iterate_pytorch_times = list(map(time_runner, iterate_pytorch_suite))
hub_iterate_pytorch_mem_usages = list(map(memory_runner, iterate_pytorch_suite))
hub_iterate_pytorch_net_usages = [await network_runner(params) for params in iterate_pytorch_suite]

print(hub_iterate_pytorch_times)
print(hub_iterate_pytorch_mem_usages)
print(hub_iterate_pytorch_net_usages)

## Hub Dataset Iteration - Tensorflow

In [None]:
from benchmark_iterate_hub_tensorflow import benchmark_iterate_hub_tensorflow_setup, benchmark_iterate_hub_tensorflow_run
iterate_tensorflow_suite = [(benchmark_iterate_hub_tensorflow_run, benchmark_iterate_hub_tensorflow_setup, t) for t in [('activeloop/mnist', 128, 128)]]

hub_iterate_tensorflow_times = list(map(time_runner, iterate_tensorflow_suite))
hub_iterate_tensorflow_mem_usages = list(map(memory_runner, iterate_tensorflow_suite))
hub_iterate_tensorflow_net_usages = [await network_runner(params) for params in iterate_tensorflow_suite]

print(hub_iterate_tensorflow_times)
print(hub_iterate_tensorflow_mem_usages)
print(hub_iterate_tensorflow_net_usages)

## Output Benchmark Results

In [None]:
# Write to your favourite file format here