# Serialization speed of NumPy arrays

In this example, we measure how much time it takes to serialize up to 1GB of random bytes, repeating the test 10 times. The type of the serialized Python variable has type `numpy.ndarray`.





In [1]:
import os

import numpy as np
from mltraq import Run, Sequence, create_experiment


def eval_persist(run: Run):
    """
    Measure the time required to persist and load
    an experiment with a run holding a blob of
    up to 1GB of randomness, in a NumPy array.
    """

    e = create_experiment()
    run.fields.log = Sequence()
    e.fields.blob = np.array(os.urandom(run.params.n_bytes))

    run.fields.log.append(tag="a")
    e.persist()
    run.fields.log.append(tag="b")
    e.reload()
    run.fields.log.append(tag="c")

    durations = run.fields.log.df().pivot_table(index="tag", values="timestamp")["timestamp"]
    run.fields.n_bytes = run.params.n_bytes
    run.fields.duration_persist = durations.b - durations.a
    run.fields.duration_load = durations.c - durations.b


e = create_experiment()

# We vary `i` to average on 10 runs per configuration,
# the number of written bytes ranges from 10B to 10GB
e.add_runs(
    i=range(10),
    n_bytes=[10**i for i in range(10)],
)

# Execute experiment, runs executed in foreground
e.execute(eval_persist, n_jobs=1)

# Overview of runs
e.runs

  1%|1         | 1/100 [00:08<13:30,  8.19s/it]

In [2]:
# "Persist" elapsed time
e.runs.df().groupby("n_bytes").duration_persist.mean()

n_bytes
1            0 days 00:00:00.021452339
10           0 days 00:00:00.004898713
100          0 days 00:00:00.043507968
1000         0 days 00:00:00.004704844
10000        0 days 00:00:00.004720204
100000       0 days 00:00:00.009054822
1000000      0 days 00:00:00.028673689
10000000     0 days 00:00:00.015729484
100000000    0 days 00:00:00.138723840
1000000000   0 days 00:00:01.770619827
Name: duration_persist, dtype: timedelta64[ns]

In [3]:
# "Reload" elapsed time
e.runs.df().groupby("n_bytes").duration_load.mean()

n_bytes
1            0 days 00:00:00.023198976
10           0 days 00:00:00.008873881
100          0 days 00:00:00.001059788
1000         0 days 00:00:00.001021030
10000        0 days 00:00:00.019154099
100000       0 days 00:00:00.001186611
1000000      0 days 00:00:00.001745612
10000000     0 days 00:00:00.008737459
100000000    0 days 00:00:00.097067699
1000000000   0 days 00:00:02.923016755
Name: duration_load, dtype: timedelta64[ns]