# LaQuacco 🍅🍅🍅

### Laboratory Quality Control v2.0 (2024-10-01)

#### https://github.com/himsr-lab/LaQuacco

### User Input

In [None]:
# 6) adjust runtime behavior
"""
Basic: Adjust these variable values as needed.
"""

images_path = r"./tests"  # absolute or relative path
images_included = "*.ome.tiff"  # extension must match
images_excluded = ""  # extension must not match

"""
Advanced: Adjust these variables to customize runtime behavior.
"""
# define closed channel intervals with endpoints
channel_limits = {"*": {"lower": None, "upper": None}}  # all channels
copy_images = False  # increases performance for network drives
figure_dimensions = (1460, 822)  # width and height in pixels
recursive_search = False  # search recursively for image files at path

"""
Check:  Running this cell lists all image files found.
"""
import os
import laquacco as laq

files = laq.get_files(
    path=os.path.abspath(images_path),
    pat=images_included,
    anti=images_excluded,
    recurse=recursive_search,
)
print(f"Images: {len(files)}")
dirs_files = {
    dir: [os.path.basename(file) for file in files if os.path.dirname(file) == dir]
    for dir in set(os.path.dirname(file) for file in files)
}
dirs_files = dict(sorted(dirs_files.items()))
for dir, dir_files in dirs_files.items():
    print(f"=> {dir}")
    print(f"{sorted(dir_files)}")

print("\nCompleted.")

### Sampling I

In [None]:
# 8) retrieve image data (individual statistics)
import os
import shutil
import time

imgs = {}  # image dictionary
imgs_chans_stats = {}  # statistics dictionary

start = time.time()
update_start = ""
update_stop = ""
update_max = 0
files_len = len(files)
for count, file in enumerate(files, start=1):
    update_len = len(update_start) + len(update_stop)
    update_max = update_len if update_len > update_max else update_max
    update_start = f"IMAGE: {file}\t[{count}/{files_len}]"
    print(f"\r{update_start}{update_stop}{' ':<{update_max - update_len}}", end="")

    # get individual statistics
    tmp = laq.copy_file(file) if copy_images else file  # optional file copy
    img = laq.get_img(tmp)
    imgs[file] = {k: v for k, v in img.items()}
    imgs_chans_stats[file] = laq.get_img_chans_stats(
        imgs[file], chans_limits=channel_limits
    )

    # remove image objects
    img["tiff"].close()
    if copy_images and tmp != file:
        shutil.rmtree(os.path.dirname(tmp))

    update_stop = f" (-{laq.get_time_left(start, count, len(files))})"
print(f"\r{' ' * update_max}\r", end="")  # clear line

print("\n-> Completed.")

### Summary I

In [None]:
# 10) print image data (individual statistics)
import warnings
import numpy as np

# sort data by acquisition
sorted_acqs = sorted(imgs.keys(), key=lambda k: imgs[k]["datetimes"][0])
imgs = {k: imgs[k] for k in sorted_acqs}
imgs_chans_stats = {k: imgs_chans_stats[k] for k in sorted_acqs}

# get data summary
chans = sorted(
    list(
        set(
            chan
            for chans_stats in imgs_chans_stats.values()
            for chan in chans_stats.keys()
        )
    )
)
stats = sorted(
    list(
        set(
            stat
            for chans_stats in imgs_chans_stats.values()
            for stat in chans_stats.values()
            for stat in stat.keys()
        )
    )
)
chans_stats = {
    chan: {
        stat: np.array(
            [
                imgs_chans_stats[img][chan][stat]
                if chan in imgs_chans_stats[img]
                else None
                for img in imgs_chans_stats.keys()
            ],
            dtype=np.float64,
        )
        for stat in stats
    }
    for chan in chans
}
chans_means = {
    chan: {
        stat: np.nanmean(
            chans_stats[chan][stat]
            if chan in chans_stats and stat in chans_stats[chan]
            else np.nan
        )
        for stat in stats
    }
    for chan in chans
}

# check data (appears after prints)
def min_warning(message, category, filename, lineno, line=None):
    return f"{category.__name__}: {message}\n"
warnings.formatwarning = min_warning  # category and message
warnings.simplefilter("always", UserWarning)  # do repeat
expos_found = {
    chan: [
        expo
        for meta in imgs.values()
        for ch, expo in zip(meta["channels"], meta["exposures"])
        if ch == chan
    ]
    for chan in set(ch for meta in imgs.values() for ch in meta["channels"])
}
stats_missing = {
    chan: [
        stat
        for stat in chans_stats[chan]
        for index, value in enumerate(chans_stats[chan][stat])
        if np.isnan(value)
    ]
    for chan in chans_stats
}
for chan in chans:
    expos_set = sorted(set(expos_found[chan]))
    if len(expos_set) > 1:  # exposure times differ
        warnings.warn(
            f"{chan}: Exposure times are not identical!\n{expos_set}", UserWarning
        )
    stats_set = sorted(set(stats_missing[chan]))
    if len(stats_missing[chan]) > 0:  # statistics incomplete
        warnings.warn(
            f"{chan}: Statistics are missing for some images!\n{stats_set}",
            UserWarning,
        )

# print data (appears before warnings)
imgs_str_len = len(str(len(imgs)))
for pos, img in enumerate(imgs):
    print(f'{pos:>{imgs_str_len}} => "{img}"  # {imgs[img]['datetimes'][0]}')
print()
for chan in chans:
    print(
        f"{chan} @ {', '.join([f'{value}{unit}' for value, unit in sorted(set(expos_found[chan]))])}"
    )
    for stat in stats:
        print(
            f"\t{stat}\t{np.nanstd(chans_stats[chan][stat]):7.1f} (mean)\t{np.nanstd(chans_stats[chan][stat]):7.1f} (std)"
        )

print("\n-> Completed.")

### Distribution - Violin Chart

In [None]:
# 12) plot image data (individual distributions)

import matplotlib as mpl
%matplotlib widget

# suppress cryptic warning about missing (`np.nan`) data
warnings.filterwarnings("ignore", message="invalid value encountered in det")

# prepare figure dimensions
dpi = mpl.pyplot.rcParams["figure.dpi"]
min_pixw, min_pixh = figure_dimensions
min_width, min_height = min_pixw / dpi, min_pixh / dpi
mpl.pyplot.rcParams["figure.figsize"] = [min_width, min_height]

# prepare variables for plotting
xy_vals = list(zip(*[(chan, stats["mean"]) for chan, stats in chans_stats.items()]))

# create violin plot
fig, ax = mpl.pyplot.subplots()
ax.set_xticks(range(1, len(xy_vals[0]) + 1), labels=chans, rotation=90)
vp = ax.violinplot(xy_vals[1], showmeans=True, showextrema=False)

# adjust colors
for p in vp["bodies"]:
    p.set_facecolor("black")
    p.set_edgecolor("black")
for m in ["cmeans"]:
    vp[m].set_edgecolor("red")

# add legend
legend = mpl.pyplot.legend(
    [vp["bodies"][0]],
    ["means"],
)

# add jittered dots
jit_means = np.concatenate(xy_vals[1])
jit = np.random.normal(0, 0.01, size=jit_means.size)
positions = np.repeat(np.arange(1, len(xy_vals[0]) + 1), len(xy_vals[1][0])) + jit
ax.scatter(positions, jit_means, color="black", s=5)

# adjust axes
mpl.pyplot.minorticks_on()

# add Y-axis label
mpl.pyplot.ylabel("Raw values [a.u.]")

# add grid
mpl.pyplot.grid(color="gray", linewidth=0.25)

# show plot
mpl.pyplot.tight_layout()
mpl.pyplot.show()

print("-> Completed.")

### Stability: Levey-Jennings Charts

In [None]:
# 14) plot image data (individual stability)
for c, chan in enumerate(chans):
    print(f"\n{chan}:")

    # create new plot
    mpl.pyplot.figure()

    # prepare variables for plotting
    chan_xy_vals = range(len(chans_stats[chan]["mean"])), chans_stats[chan]["mean"]
    chan_mean = np.nanmean(chans_stats[chan]["mean"])
    chan_std = np.nanstd(chans_stats[chan]["mean"])

    # add channel limits
    chan_stds = {}
    chan_stds_keys = [("+2 std", "-2 std"), ("+1 std", "-1 std")]
    chan_stds[chan] = {
        chan_stds_keys[0][0]: chan_mean + 2.0 * chan_std,
        chan_stds_keys[1][0]: chan_mean + 1.0 * chan_std,
        chan_stds_keys[1][1]: chan_mean - 1.0 * chan_std,
        chan_stds_keys[0][1]: chan_mean - 2.0 * chan_std,
    }
    for upper, lower in chan_stds_keys:
        mpl.pyplot.fill_between(
            chan_xy_vals[0],
            chan_stds[chan][upper],
            chan_stds[chan][lower],
            color="black",
            alpha=0.1,
        )
        mpl.pyplot.annotate(
            upper,
            xy=(-0.1, chan_stds[chan][upper]),
            ha="center",
            va="center",
            color="dimgray",
        )
        mpl.pyplot.annotate(
            lower,
            xy=(-0.1, chan_stds[chan][lower]),
            ha="center",
            va="center",
            color="dimgray",
        )

    # add channel mean
    mpl.pyplot.plot(
        [chan_xy_vals[0][0], chan_xy_vals[0][-1]],
        [chan_mean, chan_mean],
        "red",
        label="_",
        linewidth=1,
    )
    mpl.pyplot.annotate(
        "mean", xy=(-0.1, chan_mean), ha="center", va="center", color="red"
    )

    # plot error bars
    mpl.pyplot.errorbar(
        chan_xy_vals[0],
        chan_xy_vals[1],
        fmt="o-",
        linewidth=1,
        markersize=3,
        color="black",
        label=chan,
    )

    # add legend
    mpl.pyplot.legend()

    # adjust axes
    mpl.pyplot.minorticks_on()

    # add Y-axis label
    mpl.pyplot.ylabel("Raw values [a.u.]")

    # add grid
    mpl.pyplot.grid(color="gray", linewidth=0.25)

    # show plot
    mpl.pyplot.tight_layout()
    mpl.pyplot.show()

print("-> Completed.")

### Sampling II

In [None]:
# 16) retrieve image data (group statistics)
files_len = len(files)
start = time.time()
update_start = ""
update_stop = ""
update_max = 0
for count, file in enumerate(files, start=1):
    update_len = len(update_start) + len(update_stop)
    update_max = update_len if update_len > update_max else update_max
    update_start = f"IMAGE: {file}\t[{count}/{files_len}]"
    print(f"\r{update_start}{update_stop}{' ':<{update_max - update_len}}", end="")

    # get group image statistics
    tmp = laq.copy_file(file) if copy_images else file  # optional file copy
    img = laq.get_img(tmp)
    imgs_chans_stats[file] = laq.get_img_chans_stats(
        img, chans_limits=channel_limits, chans_means=chans_means
    )

    # remove image objects
    img["tiff"].close()
    if copy_images and tmp != file:
        shutil.rmtree(os.path.dirname(tmp))

    update_stop = f" (-{laq.get_time_left(start, count, len(files))})"
print(f"\r{' ' * update_max}\r", end="")  # clear line

print("\n-> Completed.")

### Summary II

In [None]:
# 18) print image data (group statistics)
order = ["band_3", "band_2", "band_1", "band_0"]
stats = sorted(
    list(
        set(
            stat
            for chans_stats in imgs_chans_stats.values()
            for stat in chans_stats.values()
            for stat in stat.keys()
        )
    ),
    key=lambda x: order.index(x),
)
chans_stats = {
    chan: {
        stat: np.array(
            [
                imgs_chans_stats[img][chan][stat]
                if chan in imgs_chans_stats[img]
                else None
                for img in imgs_chans_stats.keys()
            ],
            dtype=np.float64,
        )
        for stat in stats
    }
    for chan in chans
}

# print data
for chan in chans:
    print(
        f"{chan} @ {', '.join([f'{value}{unit}' for value, unit in sorted(set(expos_found[chan]))])}"
    )
    for stat in stats:
        print(
            f"\t{stat}\t{np.nanstd(chans_stats[chan][stat]):7.1f} (mean)\t{np.nanstd(chans_stats[chan][stat]):7.1f} (std)"
        )

print("\n-> Completed.")

### Dynamics: C-Band Charts

In [None]:
# 20) plot image data (group statistics)
bands = [stat for stat in stats if stat.startswith("band_")][::-1]
for c, chan in enumerate(chans):
    print(f"\n{chan}:")

    # create new plot
    mpl.pyplot.figure()

    # prepare variables for plotting
    chan_mean = np.nanmean(chans_means[chan]["mean"])

    # add channel mean
    mpl.pyplot.plot(
        [chan_xy_vals[0][0], chan_xy_vals[0][-1]],
        [chan_mean, chan_mean],
        "red",
        label="_",
        linewidth=1,
    )
    mpl.pyplot.annotate(
        "mean", xy=(-0.1, chan_mean), ha="center", va="center", color="red"
    )

    for b, band in enumerate(bands, start=1):
        # prepare variables for plotting
        band_xy_vals = range(len(chans_stats[chan][band])), chans_stats[chan][band]
        band_mean = np.nanmean(chans_stats[chan][band])
        band_std = np.nanstd(chans_stats[chan][band])

        # add channel limits
        band_stds = {}
        band_stds_keys = [("+2 std", "-2 std"), ("+1 std", "-1 std")]
        band_stds[chan] = {
            band_stds_keys[0][0]: band_mean + 2.0 * band_std,
            band_stds_keys[1][0]: band_mean + 1.0 * band_std,
            band_stds_keys[1][1]: band_mean - 1.0 * band_std,
            band_stds_keys[0][1]: band_mean - 2.0 * band_std,
        }
        for upper, lower in band_stds_keys:
            mpl.pyplot.fill_between(
                band_xy_vals[0],
                band_stds[chan][upper],
                band_stds[chan][lower],
                color="black",
                alpha=0.1,
            )
            mpl.pyplot.annotate(
                upper,
                xy=(-0.1, band_stds[chan][upper]),
                ha="center",
                va="center",
                color="dimgray",
            )
            mpl.pyplot.annotate(
                lower,
                xy=(-0.1, band_stds[chan][lower]),
                ha="center",
                va="center",
                color="dimgray",
            )

        # add channel mean
        mpl.pyplot.plot(
            [band_xy_vals[0][0], band_xy_vals[0][-1]],
            [band_mean, band_mean],
            "r--",
            label="_",
            linewidth=1,
        )
        mpl.pyplot.annotate(
            "mean", xy=(-0.1, band_mean), ha="center", va="center", color="red"
        )

        # plot error bars
        mpl.pyplot.errorbar(
            band_xy_vals[0],
            band_xy_vals[1],
            fmt="o-",
            linewidth=1,
            markersize=3,
            color="black",
            label=chan,
        )

    # add legend
    handles, labels = mpl.pyplot.gca().get_legend_handles_labels()
    mpl.pyplot.legend([handles[0]], [labels[0]])

    # adjust axes
    mpl.pyplot.minorticks_on()

    # add Y-axis label
    mpl.pyplot.ylabel("Raw values [a.u.]")

    # add grid
    mpl.pyplot.grid(color="gray", linewidth=0.25)

    # show plot
    mpl.pyplot.tight_layout()
    mpl.pyplot.show()

print("-> Completed.")