# LaQuacco 🍅🍅🍅

### https://github.com/himsr-lab/LaQuacco

### User Input

In [None]:
"""
Basic: Adjust these variable values as needed.
"""

images_path = "./tests"  # absolute or relative path
images_included = "*.ome.tiff"  # extension must match
images_excluded = ""  # extension must not match

"""
Advanced: Adjust these variables to customize runtime behavior.
"""
channels_limits = {}  # define closed channel intervals with endpoints
copy_images = False  # increases performance for network drives
plot_c_bands = True  # uses group statistics to plot channel bands
recursive_search = False  # search recursively for image files at path

"""
Check:  Running this cell lists all image files found.
"""
import os
import laquacco as laq

files = laq.get_files(
    path=os.path.abspath(images_path),
    pat=images_included,
    anti=images_excluded,
    recurse=recursive_search,
)
files = [file for file in sorted(files)]
print(f"Images: {len(files)}")
print(f"{[file for file in files]}")

print("\nCompleted.")


### Dataset Sampling I

In [None]:
import os
import time

imgs = {}  # image dictionary
imgs_chans_stats = {}  # statistics dictionary

start = time.time()
update_start = ""
update_stop = ""
update_max = 0
for count, file in enumerate(files, start=1):
    update_len = len(update_start) + len(update_stop)
    update_max = update_len if update_len > update_max else update_max
    update_start = f"IMAGE: {file}"
    print(f"\r{update_start}{update_stop}{' ':<{update_max - update_len}}", end="")

    # get image statistics
    tmp = laq.copy_file(file) if copy_images else file  # optional file copy
    img = laq.get_img(tmp)
    imgs[file] = {k: v for k, v in img.items()}
    imgs_chans_stats[file] = laq.get_img_chans_stats(
        imgs[file], chans_limits=channels_limits
    )

    # remove image objects
    img["tiff"].close()
    if tmp != file:
        os.remove(tmp)

    update_stop = f" (-{laq.get_time_left(start, count, len(files))})"
print(f"\r{' ' * update_max}\r", end="")  # clear line

print("\n-> Completed.")

### Data Summary I

In [None]:
import numpy as np

# sort data by acquisition
acqs = [values["datetimes"][0] for values in imgs.values()]
imgs = dict(sorted(imgs.items(), key=lambda acqs: acqs))
imgs_chans_stats = dict(sorted(imgs_chans_stats.items(), key=lambda acqs: acqs))

# get data summary
chans = sorted(
    list(
        set(
            chan
            for chans_stats in imgs_chans_stats.values()
            for chan in chans_stats.keys()
        )
    )
)
stats = sorted(
    list(
        set(
            stat
            for chans_stats in imgs_chans_stats.values()
            for stat in chans_stats.values()
            for stat in stat.keys()
        )
    )
)
chans_stats = {
    chan: {
        stat: np.array(
            [imgs_chans_stats[img][chan][stat] for img in imgs_chans_stats.keys()]
        )
        for stat in imgs_chans_stats[next(iter(imgs_chans_stats.keys()))][chan]
    }
    for chan in imgs_chans_stats[next(iter(imgs_chans_stats.keys()))]
}
chans_means = {}
for chan in chans:
    chans_means[chan] = {}
    for stat in stats:
        chans_means[chan][stat] = np.nanmean(chans_stats[chan][stat])

# check data
expos = [
    all(expo == expos[0] for expo in expos)
    for expos in zip(*[meta["exposures"] for meta in imgs.values()])
]
assert all(expos), "Exposure times are not identical within channels."

# print data
imgs_len = len(imgs)
for pos, img in enumerate(imgs):
    print(f"{pos:>{len(str(imgs_len))}} = {img}  # {imgs[img]['datetimes'][0]}")
print()
for chan in chans:
    print(
        f"{chan}  @ {imgs[next(iter(imgs))]['exposures'][0][0]}{imgs[next(iter(imgs))]['exposures'][0][1]}"
    )
    for stat in stats:
        print(
            f"\t{stat}\t{chans_means[chan][stat]:7.1f} (mean)\t{np.nanstd(chans_stats[chan][stat]):7.1f} (std)"
        )

print("\n-> Completed.")

### Dataset Distribution - Violin Chart

In [None]:
import matplotlib as mpl
%matplotlib widget

# prepare figure dimensions
dpi = mpl.pyplot.rcParams["figure.dpi"]
min_pixw, min_pixh = 1600, 900
min_width, min_height = min_pixw / dpi, min_pixh / dpi
mpl.pyplot.rcParams["figure.figsize"] = [min_width, min_height]

# prepare variables for plotting
xy_vals = list(zip(*[(chan, stats["mean"]) for chan, stats in chans_stats.items()]))

# create violin plot
fig, ax = mpl.pyplot.subplots()
ax.set_xticks(range(1, len(xy_vals[0]) + 1), labels=chans, rotation=90)
vp = ax.violinplot(xy_vals[1], showmeans=True, showextrema=False)

# adjust colors
for p in vp["bodies"]:
    p.set_facecolor("black")
    p.set_edgecolor("black")
for m in ["cmeans"]:
    vp[m].set_edgecolor("red")

# add legend
legend = mpl.pyplot.legend(
    [vp["bodies"][0]],
    ["means"],
)

# add jittered dots
jit_means = np.concatenate(xy_vals[1])
jit = np.random.normal(0, 0.05, size=jit_means.size)
positions = np.repeat(np.arange(1, len(xy_vals[0]) + 1), len(xy_vals[1][0])) + jit
ax.scatter(positions, jit_means, color="black", s=5)

# adjust axes
mpl.pyplot.minorticks_on()

# add Y-axis label
mpl.pyplot.ylabel("Raw values [a.u.]")

# add grid
mpl.pyplot.grid(color="gray", linewidth=0.25)

# show plot
mpl.pyplot.tight_layout()
mpl.pyplot.show()

print("-> Completed.")

### Dataset Consistency: Levey Jennings Charts

In [None]:
for c, chan in enumerate(chans):
    print(f"\n{chan}:")

    # create new plot
    mpl.pyplot.figure()

    # prepare variables for plotting
    chan_xy_vals = range(len(chans_stats[chan]["mean"])), chans_stats[chan]["mean"]
    chan_mean = np.nanmean(chans_stats[chan]["mean"])
    chan_std = np.nanstd(chans_stats[chan]["mean"])

    # add channel limits
    chan_stds = {}
    chan_stds_keys = [("+2 std", "-2 std"), ("+1 std", "-1 std")]
    chan_stds[chan] = {
        chan_stds_keys[0][0]: chan_mean + 2.0 * chan_std,
        chan_stds_keys[1][0]: chan_mean + 1.0 * chan_std,
        chan_stds_keys[1][1]: chan_mean - 1.0 * chan_std,
        chan_stds_keys[0][1]: chan_mean - 2.0 * chan_std,
    }
    for upper, lower in chan_stds_keys:
        mpl.pyplot.fill_between(
            chan_xy_vals[0],
            chan_stds[chan][upper],
            chan_stds[chan][lower],
            color="black",
            alpha=0.1,
        )
        mpl.pyplot.annotate(
            upper,
            xy=(-0.1, chan_stds[chan][upper]),
            ha="center",
            va="center",
            color="dimgray",
        )
        mpl.pyplot.annotate(
            lower,
            xy=(-0.1, chan_stds[chan][lower]),
            ha="center",
            va="center",
            color="dimgray",
        )

    # add channel mean
    mpl.pyplot.plot(
        [chan_xy_vals[0][0], chan_xy_vals[0][-1]],
        [chan_mean, chan_mean],
        "red",
        label="_",
        linewidth=1,
    )
    mpl.pyplot.annotate(
        "mean", xy=(-0.1, chan_mean), ha="center", va="center", color="red"
    )

    # plot error bars
    mpl.pyplot.errorbar(
        chan_xy_vals[0],
        chan_xy_vals[1],
        fmt="o-",
        linewidth=1,
        markersize=3,
        color="black",
        label=chan,
    )

    # add legend
    mpl.pyplot.legend()

    # adjust axes
    mpl.pyplot.minorticks_on()

    # add Y-axis label
    mpl.pyplot.ylabel("Raw values [a.u.]")

    # add grid
    mpl.pyplot.grid(color="gray", linewidth=0.25)

    # show plot
    mpl.pyplot.tight_layout()
    mpl.pyplot.show()

print("-> Completed.")

### Data Sampling II

In [None]:
if plot_c_bands:
    start = time.time()
    update_start = ""
    update_stop = ""
    update_max = 0
    for count, file in enumerate(files, start=1):
        update_len = len(update_start) + len(update_stop)
        update_max = update_len if update_len > update_max else update_max
        update_start = f"IMAGE: {file}"
        print(f"\r{update_start}{update_stop}{' ':<{update_max - update_len}}", end="")

        # update image statistics
        tmp = laq.copy_file(file) if copy_images else file  # optional file copy
        img = laq.get_img(tmp)
        for chan in chans:
            imgs_chans_stats[file][chan].update(
                laq.get_img_chans_stats(
                    img, chans_limits=channels_limits, chans_means=chans_means
                )[chan]
            )

        # remove image objects
        img["tiff"].close()
        if tmp != file:
            os.remove(tmp)

        update_stop = f" (-{laq.get_time_left(start, count, len(files))})"
    print(f"\r{' ' * update_max}\r", end="")  # clear line

    print("\n-> Completed.")

### Data Summary II

In [None]:
# get data summary
order = ["max", "band_3", "band_2", "mean", "band_1", "band_0", "min"]
stats = sorted(
    list(
        set(
            stat
            for chans_stats in imgs_chans_stats.values()
            for stat in chans_stats.values()
            for stat in stat.keys()
        )
    ),
    key=lambda x: order.index(x),
)
chans_stats = {
    chan: {
        stat: np.array(
            [imgs_chans_stats[img][chan][stat] for img in imgs_chans_stats.keys()]
        )
        for stat in imgs_chans_stats[next(iter(imgs_chans_stats.keys()))][chan]
    }
    for chan in imgs_chans_stats[next(iter(imgs_chans_stats.keys()))]
}
chans_means = {}
for chan in chans:
    chans_means[chan] = {}
    for stat in stats:
        chans_means[chan][stat] = np.nanmean(chans_stats[chan][stat])

# print data
for chan in chans:
    print(
        f"{chan}  @ {imgs[next(iter(imgs))]['exposures'][0][0]}{imgs[next(iter(imgs))]['exposures'][0][1]}"
    )
    for stat in stats:
        print(
            f"\t{stat}\t{chans_means[chan][stat]:7.1f} (mean)\t{np.nanstd(chans_stats[chan][stat]):7.1f} (std)"
        )
    print()

print("\n-> Completed.")

### Dataset Dynamics: C-Band Charts

In [None]:
bands = [stat for stat in stats if stat.startswith("band_")][::-1]
for c, chan in enumerate(chans):
    print(f"\n{chan}:")

    # create new plot
    mpl.pyplot.figure()

    # prepare variables for plotting
    chan_mean = np.nanmean(chans_stats[chan]["mean"])

    # add channel mean
    mpl.pyplot.plot(
        [chan_xy_vals[0][0], chan_xy_vals[0][-1]],
        [chan_mean, chan_mean],
        "red",
        label="_",
        linewidth=1,
    )
    mpl.pyplot.annotate(
        "mean", xy=(-0.1, chan_mean), ha="center", va="center", color="red"
    )

    for b, band in enumerate(bands, start=1):
        # prepare variables for plotting
        band_xy_vals = range(len(chans_stats[chan][band])), chans_stats[chan][band]
        band_mean = np.nanmean(chans_stats[chan][band])
        band_std = np.nanstd(chans_stats[chan][band])

        # add channel limits
        band_stds = {}
        band_stds_keys = [("+2 std", "-2 std"), ("+1 std", "-1 std")]
        band_stds[chan] = {
            band_stds_keys[0][0]: band_mean + 2.0 * band_std,
            band_stds_keys[1][0]: band_mean + 1.0 * band_std,
            band_stds_keys[1][1]: band_mean - 1.0 * band_std,
            band_stds_keys[0][1]: band_mean - 2.0 * band_std,
        }
        for upper, lower in band_stds_keys:
            mpl.pyplot.fill_between(
                band_xy_vals[0],
                band_stds[chan][upper],
                band_stds[chan][lower],
                color="black",
                alpha=0.1,
            )
            mpl.pyplot.annotate(
                upper,
                xy=(-0.1, band_stds[chan][upper]),
                ha="center",
                va="center",
                color="dimgray",
            )
            mpl.pyplot.annotate(
                lower,
                xy=(-0.1, band_stds[chan][lower]),
                ha="center",
                va="center",
                color="dimgray",
            )

        # add channel mean
        mpl.pyplot.plot(
            [band_xy_vals[0][0], band_xy_vals[0][-1]],
            [band_mean, band_mean],
            "r--",
            label="_",
            linewidth=1,
        )
        mpl.pyplot.annotate(
            "mean", xy=(-0.1, band_mean), ha="center", va="center", color="red"
        )

        # plot error bars
        mpl.pyplot.errorbar(
            band_xy_vals[0],
            band_xy_vals[1],
            fmt="o-",
            linewidth=1,
            markersize=3,
            color="black",
            label=chan,
        )

    # add legend
    handles, labels = mpl.pyplot.gca().get_legend_handles_labels()
    mpl.pyplot.legend([handles[0]], [labels[0]])

    # adjust axes
    mpl.pyplot.minorticks_on()

    # add Y-axis label
    mpl.pyplot.ylabel("Raw values [a.u.]")

    # add grid
    mpl.pyplot.grid(color="gray", linewidth=0.25)

    # show plot
    mpl.pyplot.tight_layout()
    mpl.pyplot.show()

print("-> Completed.")