# WorkUnit Benchmarking

This notebook compares the time and space required to save WorkUnits with different compression schemes.

In [None]:
import numpy as np
import os
import tempfile
import timeit

from kbmod.configuration import SearchConfiguration
from kbmod.core.image_stack_py import (
    image_stack_add_fake_object,
    make_fake_image_stack,
    ImageStackPy,
)
from kbmod.image_utils import image_stack_cpp_to_py
from kbmod.wcs_utils import make_fake_wcs
from kbmod.work_unit import WorkUnit

We create a fake WorkUnit to use for benchmarking.

In [None]:
rng = np.random.default_rng(seed=101)

num_times = 10
height = 1200
width = 1000
times = np.arange(num_times)

# Use a wide range of values. Science [-5000.0, 5000.0] and variance [1.0, 101.0]
sci = 10000.0 * (rng.random((num_times, height, width)) - 0.5)
var = 100.0 * rng.random((num_times, height, width)) + 1.0

# Mask out some of the values.
mask = rng.random((num_times, height, width)) < 0.01
sci[mask] = np.nan
var[mask] = np.nan

# Use a default search configuration and toy WCS.
config = SearchConfiguration()
wcs = make_fake_wcs(0.0, -15.0, height, width, deg_per_pixel=10.0 / 3600.0)

# Build a WorkUnit.
stack = ImageStackPy(times, sci, var)
wu = WorkUnit(stack, config, wcs=wcs)

Create a temporary directory for the tests. Write out and stat files in different formats.

In [None]:
tmp_dir = tempfile.TemporaryDirectory()

compression_types = ["NOCOMPRESS", "RICE_1", "GZIP_1", "GZIP_2", "HCOMPRESS_1"]
file_name = os.path.join(tmp_dir.name, "wu.fits")

print("--------------+----------+-----------+-----------+------------+---------|---------|--------")
print(" Compression  | Quantize | Size (MB) | Read Time | Write Time | Max Sci | Max Var |  Mask  ")
print(" Type         | Level    |           | (Seconds) |  (Seconds) |  Error  |  Error  | Errors ")
print("--------------+----------+-----------+-----------+------------+---------|---------|--------")

r_time = 0.0
w_time = 0.0

for compress in compression_types:
    for quantize in [100.0, 500.0, -0.1, -0.01, -0.001]:
        wu.to_fits(file_name, overwrite=True, compression_type=compress, quantize_level=quantize)
        file_size_mb = os.path.getsize(file_name) / (1024.0 * 1024.0)

        wu2 = WorkUnit.from_fits(file_name, show_progress=False)
        stack2 = image_stack_cpp_to_py(wu2.im_stack)

        # Compute the maximum error in science and variance. Count the number of
        # mismatched masked pixels.
        max_sci = 0.0
        max_var = 0.0
        mask_err = 0
        for i in range(num_times):
            max_sci = max(max_sci, np.nanmax(np.abs(stack.sci[i] - stack2.sci[i])))
            max_var = max(max_var, np.nanmax(np.abs(stack.var[i] - stack2.var[i])))
            mask_err += np.count_nonzero(mask[i] != np.isnan(stack2.sci[i]))

        # Run the timings.
        w_time = (
            timeit.timeit(
                "wu.to_fits(file_name, overwrite=True, compression_type=compress, quantize_level=quantize)",
                globals=globals(),
                number=10,
            )
            / 10.0
        )
        r_time = (
            timeit.timeit(
                "_ = WorkUnit.from_fits(file_name, show_progress=False)",
                globals=globals(),
                number=10,
            )
            / 10.0
        )

        print(
            f" {compress:12} | {quantize:8.3f} | {file_size_mb:8.2f}  "
            f"|  {r_time:8.4f} |   {w_time:8.4f} | {max_sci:7.3f} "
            f"| {max_var:7.3f} | {mask_err:5} "
        )

print("--------------+----------+-----------+-----------+------------+---------|---------|--------")

Clean up the temporary directory and files.

In [None]:
tmp_dir.cleanup()