# FAST-EM import
---
**Author**: Ryan Lane  
**Date**: 6 April 2022

#### Overview
Quickly imports a FAST-EM project to `render-ws`.

Assumes data is stored as

Stack | Filepath
- | -
raw | `/.../asm_service/{date}/{project}/{section}/{row}_{col}_{zoom}.tiff`
corrected | `/.../asm_service/{date}/{stack}/{section}/corrected/{row}_{col}_{zoom}.tiff`

and outputs mipmaps of each field's pyramidal tiff to

Stack | Filepath
- | -
raw | `/.../{project}/raw/{section}/{row}_{col}/{zoom}.tif`
corrected | `/.../{project}/corrected/{section}/{row}_{col}/{zoom}.tif`

**Warning**:
Check filepath tree carefully before executing.

In [1]:
from pathlib import Path
import re
from ruamel.yaml import YAML

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from tifffile import TiffFile

import renderapi
from renderapi.transform import AffineModel

In [97]:
# Indirectly enable autocomplete
%config Completer.use_jedi = False

# pandas display options
pd.set_option('display.max_colwidth', 20)

#### Connect to `render-ws`

In [3]:
# render parameters
owner = 'rlane'
project = '20220404_02_demo_Brain_10us'

# Create a renderapi.connect.Render object
render_connect_params = {
    'host': 'sonic.tnw.tudelft.nl',
    'port': 8080,
    'owner': owner,
    'project': project,
    'client_scripts': '/home/catmaid/render/render-ws-java-client/src/main/scripts',
    'memGB': '2G',
}
render = renderapi.connect(**render_connect_params)
render.make_kwargs()

{'host': 'http://sonic.tnw.tudelft.nl',
 'port': 8080,
 'owner': 'rlane',
 'project': '20220404_02_demo_Brain_10us',
 'client_scripts': '/home/catmaid/render/render-ws-java-client/src/main/scripts',
 'client_script': '/home/catmaid/render/render-ws-java-client/src/main/scripts/run_ws_client.sh',
 'memGB': '2G'}

#### Import and export directories

In [5]:
# Import from
dir_FASTEM = Path('/long_term_storage/asm_storage/asm_service/2022-04-04/02_demo_Brain_10us')
# Export to
dir_project = Path(f"/long_term_storage/{owner}/FAST-EM/projects/{project}/")

# Stack directory
!ls -l $dir_FASTEM

total 32
drwxrwxrwx 3 asmftp asmftp  8192 Apr  4 20:55 Ribbon_1_section_3_ROA-1
drwxrwxrwx 3 asmftp asmftp 16384 Apr  4 19:19 Ribbon_1_section_5_ROA-1


## 1) Create mipmaps
---

#### Filepath layout
Mimaps of each FAST-EM field are output to
`{dir_project}/{stack}/{sectionId}/{row}_{col}/{zoom}.tif` e.g.

In [50]:
%%bash
ls -l /long_term_storage/rlane/FAST-EM/projects/20220404_02_demo_Brain_10us/raw/Ribbon_1_section_3_ROA-1/003_004/

total 106264
-rw-rw-r-- 1 rlane rlane 81920688 Apr 13 11:43 0.tif
-rw-rw-r-- 1 rlane rlane 20480688 Apr 13 11:43 1.tif
-rw-rw-r-- 1 rlane rlane  5120688 Apr 13 11:43 2.tif
-rw-rw-r-- 1 rlane rlane  1280688 Apr 13 11:43 3.tif


In [49]:
from tifffile import TiffWriter

def create_mipmaps(tiff, dir_out, metadata):
    """Create mipmaps from multi-page tiff"""
    # Unpack pages
    for i, page in enumerate(tiff.pages):
        # Grayscale uint16 image
        image = page.asarray()
        # Write tiff
        fp = dir_out / f"{i}.tif"
        with TiffWriter(fp.as_posix()) as tif:
            tif.save(image, metadata=metadata)

#### Raw

In [34]:
# Assume subdirectories of FAST-EM directories are different sections (durr)
dir_sections = [dir_ for dir_ in dir_FASTEM.iterdir() if dir_.is_dir()]

# Loop through section directories
for z, dir_section in tqdm(enumerate(dir_sections),
                           total=len(dir_sections)):

    # Loop through tiffs in each section
    for fp in tqdm(list(dir_section.glob('[0-9]*_[0-9]*_0.tiff'))):

        # Read tiff
        tiff = TiffFile(fp)
        # Extract metadata
        metadata = {tag.name: tag.value for tag in tiff.pages[0].tags}
        # Infer row, col
        row, col = [int(i) for i in re.findall(r'\d+', fp.stem)][:2]

        # Set directory to output mipmaps
        dir_mipmaps = dir_project / 'raw' / dir_section.name / f"{row:03d}_{col:03d}"
        dir_mipmaps.mkdir(parents=True, exist_ok=True)
        # Create mipmaps
        create_mipmaps(tiff, dir_mipmaps, metadata)    

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/487 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

#### Corrected

In [35]:
# Loop through section directories
for z, dir_section in tqdm(enumerate(dir_sections),
                           total=len(dir_sections)):

    # Loop through tiffs in each section
    for fp in tqdm(list(dir_section.glob('corrected/[0-9]*_[0-9]*_0.tiff'))):

        # Read tiff
        tiff = TiffFile(fp)
        # Extract metadata
        metadata = {tag.name: tag.value for tag in tiff.pages[0].tags}
        # Infer row, col
        row, col = [int(i) for i in re.findall(r'\d+', fp.stem)[:2]]

        # Set directory to output mipmaps
        dir_mipmaps = dir_project / 'corrected' / dir_section.name / f"{row:03d}_{col:03d}"
        dir_mipmaps.mkdir(parents=True, exist_ok=True)
        # Create mipmaps
        create_mipmaps(tiff, dir_mipmaps, metadata)    

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/487 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

## 2) Create tile specifications
---

#### Filepath layout
Reminder that mimaps of each FAST-EM field are output to
`{dir_project}/{stack}/{sectionId}/{row}_{col}/{zoom}.tif`

In [36]:
import json
from renderapi.image_pyramid import ImagePyramid, MipMapLevel

In [103]:
# Parameters
overlap = 10  # % -- guess

# Collect tile specifications
tile_dicts = []
# Loop through stack directories
for dir_stack in tqdm(list(dir_project.iterdir())):
    # Set stack name ('raw' or 'corrected')
    stack = dir_stack.name

    # Subdirectories are section directories
    dir_sections = list(dir_stack.iterdir())
    # Loop through section directories
    for z, dir_section in tqdm(enumerate(dir_sections),
                               total=len(dir_sections),
                               leave=False):
        # Set sectionId
        sectionId = dir_section.name

        # Loop through mipmap directories within each section
        for dir_mipmap in tqdm(list(dir_section.glob('[0-9]*_[0-9]*')),
                               leave=False):

            # Read base-level tiff
            fp = dir_mipmap / '0.tif'
            tiff = TiffFile(fp)
            # Parse tiff tags for metadata
            md = json.loads(tiff.pages[0].description)
            # Infer row, col
            row, col = [int(i) for i in re.findall(r'\d+', fp.parent.name)[:2]]
            # Set translation based on overlap guess
            x0 = col * (1 - overlap/100) * md['ImageWidth']
            y0 = row * (1 - overlap/100) * md['ImageLength']

            # Create nested MipMapLevels
            mmls = []
            for mmfp in sorted(dir_mipmap.glob('[0-9].tif')):
                level = mmfp.stem
                imageUrl = f"https://sonic.tnw.tudelft.nl{mmfp.as_posix()}"
                mml = MipMapLevel(level, imageUrl=imageUrl)
                mmls.append(mml)
            # Create ImagePyramid from MipMapLevels
            ip = ImagePyramid({m.level: m.mipmap for m in mmls})

            # Handle missing DateTime metadata in corrected tiffs
            try:
                acqtime = pd.to_datetime(md['DateTime'])
            except KeyError:
                acqtime = -1

            # Build up tile specification
            ts = {}
            ts['stack'] = stack
            ts['sectionId'] = sectionId
            ts['z'] = z
            ts['tileId'] = f'{stack[:3]}-S{z:03d}-{row:03d}x{col:03d}'
            ts['acqtime'] = acqtime
            ts['width'] = md['ImageWidth']
            ts['height'] = md['ImageLength']
            ts['imageRow'] = row
            ts['imageCol'] = col
            ts['imagePyramid'] = ip
            ts['minint'] = 0
            ts['maxint'] = 65535
            ts['tforms'] = [AffineModel(B0=x0, B1=y0)]
            tile_dicts.append(ts)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/487 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/487 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

#### Create stack DataFrames

In [104]:
# Create DataFrame from list of tile specifications
df_stacks = pd.DataFrame(tile_dicts)

# Sneak peak
df_stacks.groupby('stack')\
         .apply(lambda x: x.sample(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,stack,sectionId,z,tileId,acqtime,width,height,imageRow,imageCol,imagePyramid,minint,maxint,tforms
stack,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
corrected,1059,corrected,Ribbon_1_section...,0,cor-S000-014x024,-1,6400,6400,14,24,"[0, 1, 2, 3, 4]",0,65535,"[M=[[1.000000,0...."
corrected,1205,corrected,Ribbon_1_section...,1,cor-S001-005x010,-1,6400,6400,5,10,"[0, 1, 2, 3, 4]",0,65535,"[M=[[1.000000,0...."
corrected,788,corrected,Ribbon_1_section...,0,cor-S000-005x005,-1,6400,6400,5,5,"[0, 1, 2, 3, 4]",0,65535,"[M=[[1.000000,0...."
raw,593,raw,Ribbon_1_section...,1,raw-S001-008x002,2022-04-04 16:53:07,6400,6400,8,2,"[0, 1, 2, 3]",0,65535,"[M=[[1.000000,0...."
raw,23,raw,Ribbon_1_section...,0,raw-S000-000x023,2022-04-04 15:28:10,6400,6400,0,23,"[0, 1, 2, 3]",0,65535,"[M=[[1.000000,0...."
raw,77,raw,Ribbon_1_section...,0,raw-S000-002x021,2022-04-04 15:36:06,6400,6400,2,21,"[0, 1, 2, 3]",0,65535,"[M=[[1.000000,0...."


#### Set intensity levels
Sample `n` images/section to determine reasonable min/max intensity values.

In [105]:
# Set parameters
n = 10          # sample size (per section)
pcts = (1, 99)  # % for intensity clipping
stacks = df_stacks['stack'].unique().tolist()
z_values = df_stacks['z'].unique().tolist()

# Loop through stacks
for stack, df_stack in tqdm(df_stacks.groupby('stack'),
                            total=len(stacks)):

    # Loop through sections
    for z, tileset in tqdm(df_stack.groupby('z'),
                           total=len(z_values),
                           leave=False):

        # Sample filepaths
        fps = tileset.sample(n)['imagePyramid']\
                      .apply(lambda x: x[0]['imageUrl'])\
                      .tolist()

        # Collect min/max intensity values
        minints = []
        maxints = []
        # Loop through sample tiles
        for fp in tqdm(fps, leave=False):

            # Load tiff image
            fp_tiff = fp.split('.nl')[1]
            tiff = TiffFile(fp_tiff)
            image = tiff.asarray()

            # Get intensity percentiles
            minint, maxint = np.percentile(image, pcts)
            minints.append(minint)
            maxints.append(maxint)

        # Set min/max intensity
        df_stacks.loc[(df_stacks['stack'] == stack) &\
                      (df_stacks['z'] == z), 'minint'] = np.mean(minints, dtype=int)
        df_stacks.loc[(df_stacks['stack'] == stack) &\
                      (df_stacks['z'] == z), 'maxint'] = np.mean(maxints, dtype=int)

# Sneak peak
df_stacks.groupby('stack')\
         .apply(lambda x: x.sample(4))

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0_level_0,Unnamed: 1_level_0,stack,sectionId,z,tileId,acqtime,width,height,imageRow,imageCol,imagePyramid,minint,maxint,tforms
stack,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
corrected,845,corrected,Ribbon_1_section...,0,cor-S000-007x006,-1,6400,6400,7,6,"[0, 1, 2, 3, 4]",29659,35667,"[M=[[1.000000,0...."
corrected,863,corrected,Ribbon_1_section...,0,cor-S000-007x024,-1,6400,6400,7,24,"[0, 1, 2, 3, 4]",29659,35667,"[M=[[1.000000,0...."
corrected,1266,corrected,Ribbon_1_section...,1,cor-S001-010x006,-1,6400,6400,10,6,"[0, 1, 2, 3, 4]",29072,35591,"[M=[[1.000000,0...."
corrected,1252,corrected,Ribbon_1_section...,1,cor-S001-009x005,-1,6400,6400,9,5,"[0, 1, 2, 3, 4]",29072,35591,"[M=[[1.000000,0...."
raw,368,raw,Ribbon_1_section...,0,raw-S000-013x004,2022-04-04 16:18:49,6400,6400,13,4,"[0, 1, 2, 3]",53784,60419,"[M=[[1.000000,0...."
raw,582,raw,Ribbon_1_section...,1,raw-S001-007x004,2022-04-04 16:51:30,6400,6400,7,4,"[0, 1, 2, 3]",53682,60674,"[M=[[1.000000,0...."
raw,549,raw,Ribbon_1_section...,1,raw-S001-004x010,2022-04-04 16:46:39,6400,6400,4,10,"[0, 1, 2, 3]",53682,60674,"[M=[[1.000000,0...."
raw,605,raw,Ribbon_1_section...,1,raw-S001-009x001,2022-04-04 16:54:53,6400,6400,9,1,"[0, 1, 2, 3]",53682,60674,"[M=[[1.000000,0...."


## 3) Upload stack to `render-ws`

In [106]:
from icatapi import create_stack_from_DataFrame

In [107]:
# Loop through stacks
for stack, df_stack in tqdm(df_stacks.groupby('stack')):

    # Set stack resolution
    Rx = 4
    Ry = 4
    Rz = 100

    # Create stacks
    create_stack_from_DataFrame(df=df_stack,
                                name=stack,
                                stackResolutionX=Rx,
                                stackResolutionY=Ry,
                                stackResolutionZ=Rz,
                                render=render)

  0%|          | 0/2 [00:00<?, ?it/s]

Creating tile specifications for [1mcorrected[0m...
Importing tile specifications to [1mcorrected[0m...
Stack [1mcorrected[0m created successfully.
Creating tile specifications for [1mraw[0m...
Importing tile specifications to [1mraw[0m...
Stack [1mraw[0m created successfully.
