# LaQuacco 🍅

## Laboratory Quality Control

### Module Imports

In [None]:
import os
import platform
import sys
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import laquacco as laq  # required by Jupyter

### User Input

In [None]:
# define relative samples size for normalization
sample_perc = 20

# define file search patterns
data_dir = r"./tests/Polaris"  # use a raw string (r"")
data_ext = "*.tif"  # include files matching pattern
anti_ext = ""  # exclude files matching pattern
recurse = True  # find files in subdirectories

# render channel images of outliers
show_img = False

### Check Files

In [None]:
# get a list of all image files
files = sorted(
    laq.get_files(
        path=data_dir,
        pat=data_ext,
        anti=anti_ext,
        recurse=recurse,
    ),
    key=str.lower,
)

print(f"Found {len(files)} image files in {os.path.abspath(data_dir)}:")
for file in files:
    print(f"{file.replace(data_dir, '.')}")

### Main Program

In [None]:
# sample experimental image data
try:
    samples = sorted(
        laq.get_samples(population=files, perc=sample_perc), key=str.lower
    )
except ValueError:
    print("Could not draw samples from experimental population.")
    sys.exit(1)

samples_img_data = {}
for sample in samples:
    samples_img_data[sample] = laq.stats_img_data(laq.get_tiff(sample))
print()

# prepare channels
chans_set = set()  # avoid duplicate entries
for img_data in samples_img_data.values():
    for chan in img_data:
        if chan not in ["metadata"]:
            chans_set.add(chan)
chans = sorted(chans_set, key=str.lower)

# prepare colormap
color_map = laq.get_colormap(len(chans))

# prepare 10% (bottom) and 90% (top) percentiles for analysis
chans_bottop = {"bot": {}, "top": {}}
for chan in chans:
    bottop = laq.get_chan_data(samples_img_data, chan, "bottop")
    chans_bottop["bot"][chan] = laq.get_mean(bottop[:, 0])
    chans_bottop["top"][chan] = laq.get_mean(bottop[:, 1])
    print(f"{chan}:", flush=True)
    print(f"\t90%: {chans_bottop['top'][chan]},\n\t10%: {chans_bottop['bot'][chan]}")
print()

images_img_data = {}
for file in files:
    images_img_data [file] = laq.stats_img_data(laq.get_tiff(file), chans_bottop["bot"])
print()

# sort experimental image data by time stamp
images_img_data = dict(
    sorted(images_img_data.items(), key=lambda v: v[1]["metadata"]["date_time"])
)

### Data Plots I - Distribution Chart

In [None]:
print("Plotting distribution chart...", flush=True)

# prepare figure dimensions (global)
dpi = plt.rcParams["figure.dpi"]
min_pixw, min_pixh = 1600, 1200
min_width, min_height = min_pixw / dpi, min_pixh / dpi
plt.rcParams["figure.figsize"] = [min_width, min_height]

# get mean data for plots
means = []
fig, ax = plt.subplots()
for c, chan in enumerate(chans):
    means.append(laq.get_chan_data(images_img_data, chan, "mean"))

# create violin plot
vp = ax.violinplot(means, showmeans=True, showextrema=False)
for p in vp["bodies"]:
    p.set_facecolor("black")
    p.set_edgecolor("black")
for l in ["cmeans"]:
    vp[l].set_edgecolor('dimgray')

# adjust axes
ax.set_xticks(
    [c for c in range(1, len(chans) + 1)],
    labels=chans,
    rotation=90,
    fontsize="small")
plt.ylim(0)

# add legend
legend = plt.legend(
    [vp["bodies"][0]],
    ["means"],
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    fontsize="small",
)

# show plot
plt.show()

sys.exit(0)

### Data Plots II - Levey-Jennings Charts

In [None]:
print("Plotting Levey-Jennings charts...", flush=True)

slice_margin = len(files) - 1  # extend slice to either sides
fit_trend = False  # fit a linear regression model of the mean
file_len = len(files)
slice_size = min(file_len, 2 * slice_margin + 1)
assert (
    slice_size > 3
), "Zero degrees of freedom to estimate the standard deviation from the trend line."
xs = range(0, file_len)
np_nan = np.full(file_len, np.nan)
signals_lj = {}
extrema_lj = {}
for c, chan in enumerate(chans):
    # prepare variables
    run_stats = {stat: np_nan.copy() for stat in ["slice", "means", "stdevs"]}
    trend_stats = {stat: np_nan.copy() for stat in ["slice", "vals", "stdevs"]}
    # get image statistics
    signal_means = laq.get_chan_data(images_img_data, chan, "sign_mean")
    signal_stdevs = laq.get_chan_data(images_img_data, chan, "sign_stdev")
    signal_stderrs = laq.get_chan_data(images_img_data, chan, "sign_stderr")
    signals_lj[chan] = signal_means
    # get trend statistics
    if fit_trend:
        slope, inter = np.polyfit(xs, signal_means, deg=1)
        trend_stats["vals"] = slope * xs + inter
    else:
        trend_stats["vals"].fill(laq.get_mean(signal_means))
    # get running statistics
    for i, mean in enumerate(signal_means):
        run_stats["slice"] = laq.get_run_slice(signal_means, i, slice_margin)
        if run_stats["slice"].size == slice_size:
            run_stats["means"][i] = laq.get_mean(run_stats["slice"])
            run_stats["stdevs"][i] = laq.get_mean(
                laq.get_run_slice(signal_stdevs, i, slice_margin)
            )
            trend_stats["slice"] = laq.get_run_slice(
                trend_stats["vals"], i, slice_margin
            )
            trend_stats["stdevs"][i] = laq.get_stdev(
                run_stats["slice"],
                mean=laq.get_mean(trend_stats["slice"]),
                ddof=3,  # estimated: slope, intercept, and mean
            )
    # get extrema from trend line
    extrema_lj_keys = [("p2stdev", "m2stdev"), ("p1stdev", "m1stdev")]
    extrema_lj[chan] = {
        extrema_lj_keys[0][0]: trend_stats["vals"] + 2.0 * trend_stats["stdevs"],
        extrema_lj_keys[1][0]: trend_stats["vals"] + 1.0 * trend_stats["stdevs"],
        extrema_lj_keys[1][1]: trend_stats["vals"] - 1.0 * trend_stats["stdevs"],
        extrema_lj_keys[0][1]: trend_stats["vals"] - 2.0 * trend_stats["stdevs"],
    }
    # plot statistics
    if chan == chans[-1]:
        signal_labels = [os.path.basename(image) for image in images_img_data.keys()]
    else:
        signal_labels = range(0, len(images_img_data))
    plt.xticks(rotation=90, fontsize="small")
    for dist in [2.0, 1.0, -1.0, -2.0]:
        linestyle = (0, (1, 2))
        if abs(dist) == 2.0:
            linestyle = linestyle = (0, (1, 4))
        plt.plot(
            run_stats["means"] + dist * run_stats["stdevs"],
            color="black",
            linewidth=1,
            linestyle=linestyle,
        )
    for upper, lower in extrema_lj_keys:
        plt.fill_between(
            xs,
            extrema_lj[chan][upper],
            extrema_lj[chan][lower],
            color="black",
            alpha=0.2,
        )
    plt.plot(trend_stats["vals"], color="black", linewidth=1, linestyle="solid")
    plt.plot(run_stats["means"], color="black", linewidth=1, linestyle="dashed")
    plt.errorbar(
        signal_labels,
        signal_means,
        yerr=signal_stderrs,
        fmt="o-",
        linewidth=1,
        markersize=2,
        color=color_map[c],
        label=chan + " [SIG]",
    )
    legend = plt.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
    plt.ylim(bottom=0.0)
    plt.show()

### Data Plots II - Extreme Values

In [None]:
# list all images with extreme values per channel
for c, chan in enumerate(chans):
    print(f"\n{chan}:", flush=True)
    outliers_lj = []
    for s, signal_lj in enumerate(signals_lj[chan]):
        if signal_lj > extrema_lj[chan]["p2stdev"][s]:
            outliers_lj.append(("▲▲ ", s, files[s], chan, signal_lj))
        elif signal_lj > extrema_lj[chan]["p1stdev"][s]:
            outliers_lj.append(("▲  ", s, files[s], chan, signal_lj))
        elif signal_lj < extrema_lj[chan]["m2stdev"][s]:
            outliers_lj.append(("▼▼ ", s, files[s], chan, signal_lj))
        elif signal_lj < extrema_lj[chan]["m1stdev"][s]:
            outliers_lj.append(("▼  ", s, files[s], chan, signal_lj))
    # show color bar at top
    cmap = mpl.cm.nipy_spectral
    norm = mpl.colors.Normalize(vmin=chans_minmax[chan][0],
                                vmax=chans_minmax[chan][1])
    scalarmappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
    scalarmappable.set_array([])
    fig = plt.figure(figsize=(min_width, 1))
    ax = fig.add_axes([0.0, 0.0, 1, 0.5])
    cbar = fig.colorbar(scalarmappable, cax=ax, orientation='horizontal')
    plt.show()
    if outliers_lj:
        # print list of outliers with optional channel images
        for indicator, position, file, channel, mean in outliers_lj:
            print(
                f"\n\t{indicator} {position} = {os.path.basename(file)}"
                f"  ({mean}) [{chans_minmax[chan][0]}-{chans_minmax[chan][1]}]"
            )
            if show_img:
                plt.imshow(
                    laq.get_chan_img(file, channel),
                    cmap="nipy_spectral",
                    vmin=chans_minmax[chan][0],
                    vmax=chans_minmax[chan][1],
                    resample=False,
                )
                plt.show()
    else:
        print(f"\t►  (none)")


### Data Plots III - C-Score Charts

In [None]:
score_results = []
for file in files:
    score_results.append(laq.score_img_data(file, chans_minmax))
scores_img_data = {score: img_data for (score, img_data) in score_results}

# plot all channel scores
for c, chan in enumerate(chans):
    print(f"\n{chan}:", flush=True)
    score_1s = laq.get_chan_data(scores_img_data, chan, "score_1")
    score_2s = laq.get_chan_data(scores_img_data, chan, "score_2")
    score_3s = laq.get_chan_data(scores_img_data, chan, "score_3")
    # prepare stacked bar plot
    fig, ax = plt.subplots()
    bottom = np.zeros(len(files))
    score_values = {"Score I": score_1s,
                    "Score II": score_2s,
                    "Score III": score_3s}
    for score, values in score_values.items():
        if chan == chans[-1]:
            score_labels = [os.path.basename(image) for image in scores_img_data.keys()]
        else:
            score_labels = range(0, len(scores_img_data))
        p = ax.bar(score_labels, values, width=0.5, label="non", bottom=bottom)
        bottom += values
    ax.set_yscale("log")
    ax.set_ylim(1e-1, 1.025e2)
    plt.axhline(y=1, color="tab:blue", linestyle="dashed")
    plt.axhline(y=10, color="tab:orange", linestyle="dashed")
    plt.axhline(y=100, color="tab:green", linestyle="dashed")
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[::-1], labels[::-1],
              title='C-Score (total)',
              loc="center left",
              bbox_to_anchor=(1, 0.5),
              fontsize="small",)
    plt.show()