# Re-post-processing
---

**Author**: Ryan Lane \
**Date**: 13 February 2023

#### Overview
Reapply post-processing corrections for FAST-EM but with subset of raw images filtered by artefact detection. Corrections are effectively just a background subtraction where background is estimated from mean (fields, axis=2).

In [2]:
def process(folder, output_subdir):
    restore_mean_level = 32768
    os.chdir(folder)
    correction_files = glob.glob('*.tiff')
    logging.info("Calculate the gain correction of current directory *.tiff files. Results are places in '/corrected/..'.")
    try:
        os.mkdir(output_subdir)
    except:
        pass
    else:
        first_done = False
        nr_off_images = 0
        logging.info('First pass (takes a minute): Calculate gain correction parameters and cell statistics from .tiff files in current folder...')
    for f in correction_files:
        if not first_done:
            sum_of_files = ScanFieldImage.load(f)
            first_done = True
        else:
            im = ScanFieldImage.load(f)
            sum_of_files = np.add(sum_of_files, im)
        nr_off_images = nr_off_images + 1
    else:
        sum_of_files = sum_of_files / nr_off_images
        sum_of_files.save('corrected/sum_of_files.tiff')
        gainreff = sum_of_files.slice()
        gainlist = ScanFieldImage.gaindeviation(gainreff)
        cf = open('corrected/gaincorrection.txt', 'w')
        for g in gainlist:
            cf.write(str(g) + '\r\n')
        else:
            cf.close()
            logging.info('Second pass (takes some more time): Perform field fixed pattern correction...')
            for index, f in enumerate(glob.glob('*.tiff')):
                field = ScanFieldImage.load(f)
                fieldimage = np.subtract(field, sum_of_files)
                fieldimage = fieldimage + restore_mean_level
                fieldimage.save('corrected/' + f, 4)

In [3]:
# Get autocomplete to work
%config Completer.use_jedi = False

In [4]:
from pathlib import Path
import re
from bs4 import BeautifulSoup as Soup

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage import exposure
from tifffile import TiffFile

### Gather image data

In [5]:
# Load acquisition filepath data
df_fps = pd.read_csv('./acquisitions.txt', delimiter='\t').set_index('Z')
df_fps.sample(5)

FileNotFoundError: [Errno 2] No such file or directory: './acquisitions.txt'

### Reprocessing functions

In [6]:
from outlier_detection import get_med, get_mad, has_artefact

ModuleNotFoundError: No module named 'outlier_detection'

### Inspect a section

In [None]:
# Select z
z = 33
# Collect filepaths to raw data for given section
d = df_fps.loc[z].to_dict()
dir_data_raw = Path("/long_term_storage/asm_storage/asm_service/") / d['Date'] / d['Project'] / d['ROA']
fps_data_raw = sorted(dir_data_raw.glob('*_0.tiff'))
fps_data_raw[:5]

In [None]:
# Calculate median and median absolute deviation
pct = 1
med = get_med(fps_data_raw, pct=pct)
mad = get_mad(fps_data_raw, med=med, pct=pct)

# Create figure
nrows, ncols, _ = [int(i) + 1 for i in fps_data_raw[-1].stem.split('_')]
fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
                         figsize=(4*ncols, 4*nrows))

# Loop through raw tiffs, keeping track of artefact-less fields
fps_clean = []
for fp in tqdm(fps_data_raw):

    # Determine row, col
    row, col, _ = [int(i) for i in fp.stem.split('_')]
    # Read tiff and extract lowest resolution page from pyramid
    tiff = TiffFile(fp.as_posix())
    image = tiff.pages[-1].asarray()
    dy, dx = image.shape

    # Detect artefacts
    corrupted = has_artefact(image, med=med, mad=mad, pct=pct, a=3)

    # Colorize
    tint_green = np.array([0, 1, 0, 0.1])
    tint_red = np.array([1, 0, 0, 0.1])
    if corrupted:
        mask = np.ones((dy, dx, 4)) * tint_red
    else:
        mask = np.ones((dy, dx, 4)) * tint_green
        fps_clean.append(fp)

    # Plot image + mask
    axes[row, col].imshow(image, cmap='Greys_r',
                          vmin=52800, vmax=60200)
    axes[row, col].imshow(mask)

    # Give label
    title = f"{row:03d} x {col:03d}"
    axes[row, col].text(0.5, 0.95, title, ha='center', va='top',
                        transform=axes[row, col].transAxes,
                        fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
    # Remove axis ticks
    axes[row, col].axis('off')

# Aesthetics
plt.subplots_adjust(hspace=0.02, wspace=-0.02)

### Re-post-process

In [None]:
from PIL import Image
from skimage.transform import pyramid_gaussian

def save_pyramidal_tiff(filepath, image, metadata, n_layers=5, options=None):
    """Save image as multi-page, pyramidal tiff

    Parameters
    ----------
    filepath : `pathlib.Path`
        Filepath to save tiff
    image : array-like
        Input image, becomes the base-level of the pyramid
    metadata : dict
        Tiff metadata
    n_layers : int (optional)
        Number of layers
    options : dict (optional)
        Extra optional metadata

    References
    ----------
    [1] https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#saving-tiff-images
    """
    # Generate image pyramid
    pyramid = pyramid_gaussian(image,
                               downscale=2,
                               order=3,
                               max_layer=n_layers,
                               preserve_range=True)

    # Extract layers from pyramid and force uint16
    layers = [Image.fromarray(layer.astype(np.uint16)) for layer in pyramid]

    # Handle metadata
    if metadata is None:
        metadata = {}

    # Use funky PIL thing because it works
    im = layers[0]
    im.save(filepath.as_posix(), append_images=layers[1:],
            tiffinfo=metadata, save_all=True)

In [None]:
def reprocess(dir_data_raw, fps_clean):
    """Reapply post-processing corrections

    Parameters
    ----------
    dir_data_raw : `pathlib.Path`
        Filepath to directory containing the raw fields to reprocess
    fps_clean : list-like
        List of filepaths of artefact-free fields
    """
    # Borrow some technolution naming conventions
    restore_mean_level = 32768
    sum_of_files = 0.0  # set to float to avoid integer overflow
    # Set target output directory
    dir_data_corrected = dir_data_raw / 'corrected'

    # Estimate background by averaging over clean images
    for fp in tqdm(fps_clean, desc="Estimating background"):
        # Load tiff
        tiff = TiffFile(fp.as_posix())
        image = tiff.pages[0].asarray()
        # Sum all the clean images together
        sum_of_files += image

    # Make the sum a mean
    background = sum_of_files / len(fps_clean)

    # Collect filepaths to raw fields
    fps_raw = sorted(dir_data_raw.glob('*_0.tiff'))
    # Loop through raw fields
    for fp in tqdm(fps_raw, desc="Background subtraction / re-exporting"):
        # Load tiff
        tiff = TiffFile(fp.as_posix())
        image = tiff.pages[0].asarray()
        # Subtract background from each raw field
        # and restore to 16bit mean level
        corrected = (image - background + restore_mean_level).astype(np.uint16)

        # Save corrected field as pyramidal tiff
        fp_tgt = dir_data_corrected / fp.name
        save_pyramidal_tiff(fp_tgt, corrected, None)
    # Save background
    save_pyramidal_tiff()

In [None]:
# Reprocess
reprocess(dir_data_raw, fps_clean)

### Inspect re-post-processed

In [None]:
# Collect reprocessed filepaths
fps_data_reprocessed = sorted((dir_data_raw / 'corrected').glob('*_0.tiff'))

# Create figure
nrows, ncols, _ = [int(i) + 1 for i in fps_data_raw[-1].stem.split('_')]
fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
                         figsize=(4*ncols, 4*nrows))

# Loop through raw tiffs, keeping track of artefact-less fields
for fp in tqdm(fps_data_reprocessed):

    # Determine row, col
    row, col, _ = [int(i) for i in fp.stem.split('_')]
    # Read tiff and extract lowest resolution page from pyramid
    tiff = TiffFile(fp.as_posix())
    image = tiff.pages[-1].asarray()
    dy, dx = image.shape

    # Plot image + mask
    axes[row, col].imshow(image, cmap='Greys_r',
                          vmin=28600, vmax=35900)

    # Give label
    title = f"{row:03d} x {col:03d}"
    axes[row, col].text(0.5, 0.95, title, ha='center', va='top',
                        transform=axes[row, col].transAxes,
                        fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
    # Remove axis ticks
    axes[row, col].axis('off')

# Aesthetics
plt.subplots_adjust(hspace=0.02, wspace=-0.02)