# Save Load Performance Benchmarks
In this notebook, two save file formats are compared against each other for their read/write performace. The version one format will write each "population" in an optimization history object as its own HDF5 group. The version two format will instead merge all decision variables, objectives, and constraints into a single large array before saving. This should be more performant by histting the HDF5 API fewer times.

In [1]:
%load_ext autoreload
%autoreload 2

from paretobench.containers import Experiment
import tempfile
import time
import os
import numpy as np

In [2]:
# Create an experiment to play around with
exp = Experiment.from_random(5*32, 20, 10, 30, 2, 50)

In [3]:
def time_save_load(version):
    with tempfile.TemporaryDirectory() as dir:
        # Get the start time
        start_t = time.perf_counter()
        
        # Save it
        exp.save(os.path.join(dir, 'test.h5'), version=version)
        save_t = time.perf_counter() - start_t
        
        # Load it
        Experiment.load(os.path.join(dir, 'test.h5'), version=version)
        load_t = time.perf_counter() - start_t - save_t
        
        # Report the timings
        return {'save_t': save_t, 'load_t': load_t}

In [4]:
# Measure times for version one format
runs = [time_save_load(1) for _ in range(10)]
load_t = np.mean([r['load_t'] for r in runs])
save_t = np.mean([r['save_t'] for r in runs])
print(f'Version one timings: save={save_t:.2f}s, load={load_t:.2f}s')

Version one timings: save=1.29s, load=1.08s


In [6]:
# Measure times for version two format
runs = [time_save_load(2) for _ in range(10)]
load_t = np.mean([r['load_t'] for r in runs])
save_t = np.mean([r['save_t'] for r in runs])
print(f'Version two timings: save={save_t:.2f}s, load={load_t:.2f}s')

Version two timings: save=0.13s, load=0.14s
