# LaQuacco 🍅🍅🍅

## Laboratory Quality Control v1.0 (2024-06-14)

#### https://github.com/christianrickert/LaQuacco


### Module Imports

In [None]:
import os
import shutil
import time
import matplotlib.pyplot as plt
import numpy as np
import laquacco as laq  # custom functions

### User Input

In [None]:
# define search pattern and path for positive controls:
# statistics will be used for later quality control
control_dir = r"./tests/Polaris"  # directory string
control_ext = "*.tif"  # positive search pattern
control_exc = ""  # negative search pattern
control_sub = True  # recursive search

# define search pattern and path for dataset samples
# run quality control for each channel
sample_dir = r"./tests/Polaris"
sample_ext = "*.tif"
sample_exc = ""
sample_sub = True

# copy (remote) files to local temp files
copy_tiff = True

### Check Dataset

In [None]:
# list positive controls
controls = sorted(
    laq.get_files(
        path=control_dir,
        pat=control_ext,
        anti=control_exc,
        recurse=control_sub,
    ),
    key=str.lower,
)
controls_len = len(controls)
print(f"Controls: {controls_len}\n\t{os.path.abspath(control_dir)}")
for control in controls:
    print(f"\t {control.replace(control_dir, '.')}")
print()

# list dataset samples
samples = sorted(
    laq.get_files(
        path=sample_dir,
        pat=sample_ext,
        anti=sample_exc,
        recurse=sample_sub,
    ),
    key=str.lower,
)
samples_len = len(samples)
print(f"Samples: {samples_len}\n\t{os.path.abspath(sample_dir)}")
for sample in samples:
    print(f"\t {sample.replace(sample_dir, '.')}")

### Sample Dataset

In [None]:
# get control statistics
controls_img_data = {}
max_controls_length = max([len(os.path.basename(control)) for control in controls])
start = time.time()
for count, control in enumerate(controls, start=1):
    print(
        f"CONTROL: {os.path.basename(control):{max_controls_length}} [{count}/{controls_len}] ",
        end="",
        flush=True,
    )
    if copy_tiff:
        temp_control = laq.copy_file(control)
        controls_img_data[control] = laq.stats_img_data(
            laq.get_tiff(temp_control))  # signal above zero
        shutil.rmtree(os.path.dirname(temp_control))
    else:
        controls_img_data[control] = laq.stats_img_data(
            laq.get_tiff(control))
    print(f"(-{laq.get_time_left(start, count, controls_len)})")
print()

# list control channel names (by name)
chans = dict()
for img_data in controls_img_data.values():
    for chan in img_data:
        chans[chan] = None
chans = list(chans.keys())
chans.remove("metadata")
chans = sorted(chans, key=str.lower)
max_chans_length = max([len(chan) for chan in chans])

# get control statistics
control_stats = {chan: (np.nan, np.nan) for chan in chans}
for chan in chans:
    print(f"{chan}:")
    control_mean = laq.get_chan_data(controls_img_data, chan, "mean")
    control_minmax = laq.get_chan_data(controls_img_data, chan, "minmax", 2)
    control_stats[chan] = (
        np.nanmean(control_mean),
        np.nanmean(control_minmax[:, 0]),
        np.nanmean(control_minmax[:, 1]),
    )
    print(
        f"\tMaximum: {control_stats[chan][2]:7.1f} (mean)"\
        f"{np.nanstd(control_minmax[:, 1]):7.1f} (std)\n",
        f"\tMean:    {control_stats[chan][0]:7.1f} (mean)"\
        f"{np.nanstd(control_mean):7.1f} (std)\n",
        f"\tMinimum: {control_stats[chan][1]:7.1f} (mean)"\
        f"{np.nanstd(control_minmax[:, 0]):7.1f} (std)\n",
    )

# get sample statistics
samples_img_data = {}
max_samples_length = max([len(os.path.basename(sample)) for sample in samples])
start = time.time()
for count, sample in enumerate(samples, start=1):
    print(
        f"SAMPLE: {os.path.basename(sample):{max_samples_length}} [{count}/{samples_len}] ",
        end="",
        flush=True,
    )
    if copy_tiff:
        temp_sample = laq.copy_file(sample)
        samples_img_data[sample] = laq.stats_img_data(
            laq.get_tiff(temp_sample), control_stats)
        shutil.rmtree(os.path.dirname(temp_sample))
    else:
        samples_img_data[sample] = laq.stats_img_data(
            laq.get_tiff(sample), control_stats)
    print(f"(-{laq.get_time_left(start, count, samples_len)})")
print()

# sort samples by time stamp
samples_img_data = dict(
    sorted(samples_img_data.items(), key=lambda v: v[1]["metadata"]["date_time"])
)
samples = [sample for sample in samples_img_data.keys()]

### Write Dataset

In [None]:
# get sample means
sample_means = {}
for c, chan in enumerate(chans):
    sample_means[chan] = laq.get_chan_data(samples_img_data, chan, "mean")

with open("channel_values.csv", 'w', encoding='utf-8') as csv_file:
    csv_file.write(",".join(["Sample"] + [channel for channel in sample_means.keys()]) +"\n")
    for index, sample in enumerate(samples):
        csv_file.write(",".join([sample] + [str(mean[index]) for mean in sample_means.values()]) + "\n")