# Serialization speed of Python `bytes`

In this example, we measure how much time it takes to serialize up to 1GB of random bytes, repeating the test 10 times. The type of the serialized Python variable has type `bytes`. Interesting fact: [SQLite is faster than writing directly to filesystem](https://www.sqlite.org/fasterthanfs.html).





In [1]:
import os

from mltraq import Run, Sequence, create_experiment


def eval_persist(run: Run):
    """
    Measure the time required to persist and load
    an experiment with a run holding a blob of
    up to 1GB of randomness.
    """

    e = create_experiment()
    run.fields.log = Sequence()
    e.fields.blob = os.urandom(run.params.n_bytes)

    run.fields.log.append(tag="a")
    e.persist()
    run.fields.log.append(tag="b")
    e.reload()
    run.fields.log.append(tag="c")

    durations = run.fields.log.df().pivot_table(index="tag", values="timestamp")["timestamp"]
    run.fields.n_bytes = run.params.n_bytes
    run.fields.duration_persist = durations.b - durations.a
    run.fields.duration_load = durations.c - durations.b


e = create_experiment()

# We vary `i` to average on 10 runs per configuration,
# the number of written bytes ranges from 10B to 10GB
e.add_runs(
    i=range(10),
    n_bytes=[10**i for i in range(10)],
)

# Execute experiment, runs executed in foreground
e.execute(eval_persist, n_jobs=1)

# Overview of runs
e.runs

  1%|1         | 1/100 [00:02<04:32,  2.75s/it]

In [2]:
# "Persist" elapsed time
e.runs.df().groupby("n_bytes").duration_persist.mean()

n_bytes
1            0 days 00:00:00.002832947
10           0 days 00:00:00.008128281
100          0 days 00:00:00.009354188
1000         0 days 00:00:00.002717363
10000        0 days 00:00:00.003744972
100000       0 days 00:00:00.023576780
1000000      0 days 00:00:00.008609689
10000000     0 days 00:00:00.007717964
100000000    0 days 00:00:00.062335795
1000000000   0 days 00:00:00.770540262
Name: duration_persist, dtype: timedelta64[ns]

In [3]:
# "Reload" elapsed time
e.runs.df().groupby("n_bytes").duration_load.mean()

n_bytes
1            0 days 00:00:00.000568806
10           0 days 00:00:00.000534912
100          0 days 00:00:00.000547814
1000         0 days 00:00:00.000554009
10000        0 days 00:00:00.000543616
100000       0 days 00:00:00.000590899
1000000      0 days 00:00:00.000763033
10000000     0 days 00:00:00.001858662
100000000    0 days 00:00:00.029299404
1000000000   0 days 00:00:00.458244556
Name: duration_load, dtype: timedelta64[ns]