# LaQuacco 🍅🍅🍅

## Laboratory Quality Control

### Module Imports

In [None]:
import os
import platform
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import laquacco as laq  # required by Jupyter

### User Input

In [None]:
# define relative samples size for normalization
sample_perc = 20

# define file search patterns
data_dir = r"./tests/Polaris"  # use a raw string (r"")
data_ext = "*.tif"  # include files matching pattern
anti_ext = ""  # exclude files matching pattern
recurse = True  # find files in subdirectories

# render channel images of outliers
show_img = False

### Check Files

In [None]:
# get a list of all image files
files = sorted(
    laq.get_files(
        path=data_dir,
        pat=data_ext,
        anti=anti_ext,
        recurse=recurse,
    ),
    key=str.lower,
)

print(f"Found {len(files)} image files in {os.path.abspath(data_dir)}:")
for file in files:
    print(f"{file.replace(data_dir, '.')}")

### Main Program

In [None]:
# sample experimental image data
try:
    samples = sorted(laq.get_samples(population=files, perc=sample_perc), key=str.lower)
except ValueError:
    print("Could not draw samples from experimental population.")
    sys.exit(1)

samples_img_data = {}
for sample in samples:
    samples_img_data[sample] = laq.stats_img_data(laq.get_tiff(sample))
print()

# prepare channels
chans_set = set()  # avoid duplicate entries
for img_data in samples_img_data.values():
    for chan in img_data:
        if chan not in ["metadata"]:
            chans_set.add(chan)
chans = sorted(chans_set, key=str.lower)

# prepare colormap
color_map = laq.get_colormap(len(chans))

# prepare bottom and top percentiles for analysis
chan_mins = {chan: np.nan for chan in chans}
chan_maxs = {chan: np.nan for chan in chans}
chan_minmax = {chan: (np.nan, np.nan) for chan in chans}
for chan in chans:
    print(f"{chan}:", flush=True)
    bottop = laq.get_chan_data(samples_img_data, chan, "bottop")
    chan_mins[chan] = np.median(bottop[:, 0])
    chan_maxs[chan] = np.median(bottop[:, 1])
    chan_minmax[chan] = (chan_mins[chan], chan_maxs[chan])
    print(
        f"\t90%: {np.mean(bottop[:, 1])} (mean), {np.std(bottop[:, 1])} (std)\n",
        f"\t10%: {np.mean(bottop[:, 0])} (mean), {np.std(bottop[:, 0])} (std)",
    )
print()

# get image statistics
images_img_data = {}
for file in files:
    images_img_data[file] = laq.stats_img_data(laq.get_tiff(file), chan_mins)
print()

# sort experimental image data by time stamp
images_img_data = dict(
    sorted(images_img_data.items(), key=lambda v: v[1]["metadata"]["date_time"])
)

# update file order list to match time stamp
files = [file for file in images_img_data.keys()]

# chan_minmax = {chan: (0.0, 12.0) for chan in chans}

# get image scores
scores_img_data = {}
for file in files:
    scores_img_data[file] = laq.score_img_data(laq.get_tiff(file), chan_minmax)
print()

### Data Plots I - Distribution Chart

In [None]:
print("Plotting distribution chart...\n", flush=True)

# prepare figure dimensions (global)
dpi = plt.rcParams["figure.dpi"]
min_pixw, min_pixh = 1600, 1200
min_width, min_height = min_pixw / dpi, min_pixh / dpi
plt.rcParams["figure.figsize"] = [min_width, min_height]

# get mean data for plots
means = []
fig, ax = plt.subplots()
for c, chan in enumerate(chans):
    means.append(laq.get_chan_data(images_img_data, chan, "mean"))

# create violin plot
vp = ax.violinplot(means, showmeans=True, showextrema=False)
for p in vp["bodies"]:
    p.set_facecolor("black")
    p.set_edgecolor("black")
for l in ["cmeans"]:
    vp[l].set_edgecolor("dimgray")

# adjust axes
ax.set_xticks([c for c in range(1, len(chans) + 1)], labels=chans, rotation=90)
plt.ylim(bottom=0.0)

# add legend
legend = plt.legend(
    [vp["bodies"][0]],
    ["means"],
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    fontsize="small",
)

# show plot
plt.show()

### Data Plots II - Levey-Jennings Charts

In [None]:
print("Plotting Levey-Jennings charts...\n", flush=True)

# positions in plot
for position, file in enumerate(files, 1):
    print(f"{position} = {os.path.basename(file)}")

# prepare variables
file_len = len(files)
slice_ext = file_len - 1  # extend slice to either sides
fit_trend = False  # fit a linear regression model of the mean
slice_size = min(file_len, 2 * slice_ext + 1)
assert (
    slice_size > 3
), "Zero degrees of freedom to estimate the standard deviation from the trend line."
chan_means = {}
chan_extras = {}
x_vals = range(1, file_len + 1)
np_nan = np.full(file_len, np.nan)

# create error bar plots
for c, chan in enumerate(chans):
    # prepare variables
    run = {stat: np_nan.copy() for stat in ["slice", "means", "stdevs"]}
    trend = {stat: np_nan.copy() for stat in ["slice", "vals", "stdevs"]}

    # get image statistics
    means = laq.get_chan_data(images_img_data, chan, "mean")
    stdevs = laq.get_chan_data(images_img_data, chan, "stdev")
    stderrs = laq.get_chan_data(images_img_data, chan, "stderr")
    chan_means[chan] = means

    # get trend statistics
    if fit_trend:
        slope, inter = np.polyfit(x_vals, means, deg=1)
        trend["vals"] = slope * x_vals + inter
    else:
        trend["vals"].fill(laq.get_mean(means))

    # get running statistics
    for i, mean in enumerate(means):
        run["slice"] = laq.get_run_slice(means, i, slice_ext)
        if run["slice"].size == slice_size:
            run["means"][i] = laq.get_mean(run["slice"])
            run["stdevs"][i] = laq.get_mean(laq.get_run_slice(stdevs, i, slice_ext))
            trend["slice"] = laq.get_run_slice(trend["vals"], i, slice_ext)
            trend["stdevs"][i] = laq.get_stdev(
                run["slice"],
                mean=laq.get_mean(trend["slice"]),
                ddof=3,  # estimated: slope, intercept, and mean
            )

    # get extrema from trend line
    chan_extras_keys = [("p2stdev", "m2stdev"), ("p1stdev", "m1stdev")]
    chan_extras[chan] = {
        chan_extras_keys[0][0]: trend["vals"] + 2.0 * trend["stdevs"],
        chan_extras_keys[1][0]: trend["vals"] + 1.0 * trend["stdevs"],
        chan_extras_keys[1][1]: trend["vals"] - 1.0 * trend["stdevs"],
        chan_extras_keys[0][1]: trend["vals"] - 2.0 * trend["stdevs"],
    }

    # plot statistics
    for dist in [2.0, 1.0, -1.0, -2.0]:
        linestyle = (0, (1, 2))
        if abs(dist) == 2.0:
            linestyle = linestyle = (0, (1, 4))
        plt.plot(
            x_vals,
            run["means"] + dist * run["stdevs"],
            color="black",
            linewidth=1,
            linestyle=linestyle,
        )
    for upper, lower in chan_extras_keys:
        plt.fill_between(
            x_vals,
            chan_extras[chan][upper],
            chan_extras[chan][lower],
            color="black",
            alpha=0.2,
        )
    plt.plot(x_vals, trend["vals"], color="black", linewidth=1, linestyle="solid")
    plt.plot(x_vals, run["means"], color="black", linewidth=1, linestyle="dashed")

    # plot y values with errors
    plt.errorbar(
        x_vals,
        means,
        yerr=stderrs,
        fmt="o-",
        linewidth=1,
        markersize=2,
        color=color_map[c],
        label=chan,
    )

    # adjust axes
    plt.ylim(0.0)

    # add legend
    legend = plt.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")

    # show plot
    plt.show()

### Data Plots II - Extreme Values

In [None]:
# list all images with extreme values per channel
for c, chan in enumerate(chans):
    print(f"\n{chan}:", flush=True)
    outliers = []
    for m, mean in enumerate(chan_means[chan]):
        if mean > chan_extras[chan]["p2stdev"][m]:
            outliers.append(("▲▲ ", m, files[m], chan, mean))
        elif mean > chan_extras[chan]["p1stdev"][m]:
            outliers.append(("▲  ", m, files[m], chan, mean))
        elif mean < chan_extras[chan]["m2stdev"][m]:
            outliers.append(("▼▼ ", m, files[m], chan, mean))
        elif mean < chan_extras[chan]["m1stdev"][m]:
            outliers.append(("▼  ", m, files[m], chan, mean))
    # show color bar at top
    cmap = mpl.cm.nipy_spectral
    norm = mpl.colors.Normalize(
        vmin=np.median(chan_mins[chan]), vmax=np.median(chan_maxs[chan])
    )
    scalarmappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
    scalarmappable.set_array([])
    fig = plt.figure(figsize=(min_width, 1))
    ax = fig.add_axes([0.0, 0.0, 1, 0.5])
    cbar = fig.colorbar(scalarmappable, cax=ax, orientation="horizontal")
    plt.show()
    if outliers:
        # print list of outliers with optional channel images
        for indicator, position, file, channel, mean in outliers:
            print(
                f"\n\t{indicator} {position} = {os.path.basename(file)}"
                # f"  ({mean}) [{chans_minmax[chan][0]}-{chans_minmax[chan][1]}]"
            )
            if show_img:
                plt.imshow(
                    laq.get_chan_img(file, channel),
                    cmap="nipy_spectral",
                    vmin=chan_mins[chan],
                    vmax=chan_maxs[chan],
                    resample=False,
                )
                plt.show()
    else:
        print(f"\t►  (none)")

### Data Plots III - C-Score Charts

In [None]:
print("Plotting C-Score charts...\n", flush=True)

# positions in plot
for position, file in enumerate(files, 1):
    print(f"{position} = {os.path.basename(file)}")

# create stacked bar plots
for c, chan in enumerate(chans):
    print(f"\n{chan}:", flush=True)
    score_1s = laq.get_chan_data(scores_img_data, chan, "score_1")
    score_2s = laq.get_chan_data(scores_img_data, chan, "score_2")
    score_3s = laq.get_chan_data(scores_img_data, chan, "score_3")
    # prepare stacked bar plot
    fig, ax = plt.subplots()
    bottom = np.zeros(file_len)
    score_labels = range(1, file_len + 1)
    score_values = {"Score I": score_1s, "Score II": score_2s, "Score III": score_3s}
    for score, values in score_values.items():
        p = ax.bar(score_labels, values, width=0.5, label=score, bottom=bottom)
        bottom += values

    # adjust axes
    ax.set_yscale("log")
    ax.set_ylim(1e-1, 1.025e2)
    plt.axhline(y=1, color="tab:blue", linestyle="dashed")
    plt.axhline(y=10, color="tab:orange", linestyle="dashed")
    plt.axhline(y=100, color="tab:green", linestyle="dashed")

    # add legend, inverse order
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(
        handles[::-1],
        labels[::-1],
        title="C-Score (total)",
        loc="center left",
        bbox_to_anchor=(1, 0.5),
    )
    plt.show()