# iCAT Export
---

#### Overview
Export data in pyramidal png stacks for CATMAID.

In [1]:
from pathlib import Path
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### Settings

In [2]:
# pandas display settings
pd.set_option('display.max_rows', 20)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 15)

# Indirectly enable autocomplete
%config Completer.use_jedi = False

## Set up `render-ws` environment
---

In [3]:
import renderapi
from icatapi.render_pandas import create_stack_DataFrame, create_stacks_DataFrame

In [4]:
# `render` project parameters
# ---------------------------
owner = 'rlane'
project = 'pancreas'
project_dir = Path(f'/long_term_storage/{owner}/SECOM/projects/{project}')
export_dir = Path(f'/long_term_storage/{owner}/CATMAID/projects/{project}')

# Create a renderapi.connect.Render object
# ----------------------------------------
render_connect_params = {
    'host': 'sonic.tnw.tudelft.nl',
    'port': 8080,
    'owner': owner,
    'project': project,
    'client_scripts': '/home/catmaid/render/render-ws-java-client/src/main/scripts',
    'memGB': '2G'
}
render = renderapi.connect(**render_connect_params)
render.make_kwargs()

{'host': 'http://sonic.tnw.tudelft.nl',
 'port': 8080,
 'owner': 'rlane',
 'project': 'pancreas',
 'client_scripts': '/home/catmaid/render/render-ws-java-client/src/main/scripts',
 'client_script': '/home/catmaid/render/render-ws-java-client/src/main/scripts/run_ws_client.sh',
 'memGB': '2G'}

In [5]:
# Infer stack and section info
# ----------------------------
stacks = renderapi.render.get_stacks_by_owner_project(render=render)
stacks_EM = [stack for stack in stacks if 'EM' in stack]
stacks_FM = [stack for stack in stacks if 'EM' not in stack]
stacks_2_export = [
    'EM_himag_stitched',
    'EM_lomag_correlated',
    'Hoechst_correlated',
    'AF594_correlated'
]

# Output
# ------
out = f"""\
project directory... {project_dir} | Exists: {project_dir.exists()}
export directory.... {export_dir} | Exists: {export_dir.exists()}
all stacks.......... {stacks}
EM stacks........... {stacks_EM}
FM stacks........... {stacks_FM}
stacks to export.... {stacks_2_export}
...
"""
print(out)

# Create project DataFrame
# ------------------------
df_project = create_stacks_DataFrame(stacks=stacks_2_export,
                                     render=render)
df_project.groupby('stack')\
          .apply(lambda x: x.head(5))

project directory... /long_term_storage/rlane/SECOM/projects/pancreas | Exists: False
export directory.... /long_term_storage/rlane/CATMAID/projects/pancreas | Exists: False
all stacks.......... ['Hoechst_correlated', 'EM_lomag_correlated', 'AF594_correlated', 'EM_minimontages', 'EM_lomag_world', 'EM_himag_world', 'EM_lomag_overlaid', 'AF594_overlaid', 'Hoechst_overlaid', 'EM_himag_stitched', 'EM_himag_montaged', 'Hoechst', 'EM_lomag', 'EM_himag', 'AF594']
EM stacks........... ['EM_lomag_correlated', 'EM_minimontages', 'EM_lomag_world', 'EM_himag_world', 'EM_lomag_overlaid', 'EM_himag_stitched', 'EM_himag_montaged', 'EM_lomag', 'EM_himag']
FM stacks........... ['Hoechst_correlated', 'AF594_correlated', 'AF594_overlaid', 'Hoechst_overlaid', 'Hoechst', 'AF594']
stacks to export.... ['EM_himag_stitched', 'EM_lomag_correlated', 'Hoechst_correlated', 'AF594_correlated']
...



Unnamed: 0_level_0,Unnamed: 1_level_0,tileId,z,width,height,minint,maxint,imageUrl,tforms,stack,sectionId,imageRow,imageCol,stageX,stageY
stack,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AF594_correlated,108,aaa_insulin...,4.0,2048.0,2048.0,441.0,882.0,https://son...,[M=[[1.0000...,AF594_corre...,S004,1,1,1025.598744,6371.521975
AF594_correlated,109,aaa_insulin...,5.0,2048.0,2048.0,439.0,878.0,https://son...,[M=[[1.0000...,AF594_corre...,S005,1,1,1622.072408,6381.0776
AF594_correlated,110,aaa_insulin...,6.0,2048.0,2048.0,445.0,890.0,https://son...,[M=[[1.0000...,AF594_corre...,S006,1,1,2227.808609,6381.567861
AF594_correlated,111,aaa_insulin...,7.0,2048.0,2048.0,439.0,878.0,https://son...,[M=[[1.0000...,AF594_corre...,S007,1,1,2832.009499,6381.316683
EM_himag_stitched,0,aaa_lil_EM-...,4.0,4096.0,4096.0,31039.0,32893.0,https://son...,[M=[[0.2309...,EM_himag_st...,S004,6,6,1078.477,6335.559
EM_himag_stitched,1,aab_lil_EM-...,4.0,4096.0,4096.0,31039.0,32893.0,https://son...,[M=[[0.2309...,EM_himag_st...,S004,6,5,1060.996,6335.559
EM_himag_stitched,2,aac_lil_EM-...,4.0,4096.0,4096.0,31039.0,32893.0,https://son...,[M=[[0.2308...,EM_himag_st...,S004,6,4,1043.523,6335.571
EM_himag_stitched,3,aad_lil_EM-...,4.0,4096.0,4096.0,31039.0,32893.0,https://son...,[M=[[0.2309...,EM_himag_st...,S004,6,3,1025.992,6335.58
EM_himag_stitched,4,aae_lil_EM-...,4.0,4096.0,4096.0,31039.0,32893.0,https://son...,[M=[[0.2309...,EM_himag_st...,S004,6,2,1008.514,6335.597
EM_lomag_correlated,100,aaa_big_EM-...,4.0,4096.0,4096.0,31678.0,34295.0,https://son...,[M=[[1.5358...,EM_lomag_co...,S004,1,1,1021.11,6371.003


## Export `render-ws` stacks to CATMAID
---
### Set up CATMAID export parameters

In [6]:
from random import sample
from renderapi.client import ArgumentParameters

In [7]:
class CatmaidBoxesParameters(ArgumentParameters):
    """Subclass of `ArgumentParameters` for facilitating CATMAID export client script"""
    def __init__(self, stack, root_directory,
                 height=1024, width=1024, fmt='png', max_level=0,
                 host=None, port=None, baseurl=None,
                 owner=None, project=None, render=None, **kwargs):

        super(CatmaidBoxesParameters, self).__init__(**kwargs)

        self.stack = stack
        self.rootDirectory = root_directory
        self.height = height
        self.width = width
        self.format = fmt
        self.maxLevel = max_level

        render_kwargs = render.make_kwargs()
        host = render_kwargs.get('host')
        port = render_kwargs.get('port')
        self.baseDataUrl = renderapi.render.format_baseurl(host, port)
        self.owner = render_kwargs.get('owner') if owner is None else owner
        self.project = render_kwargs.get('project') if project is None else project

#### Logic for maximum zoom level

Ideally `max_level` is set such that

\begin{equation}
\left( \frac{w_s}{w_t \,\, 2^m} \right) < 1
\end{equation}

where $m$ is `max_level`, $w_s$ is the width of the stack and $w_t$ is the width of each tile. Then

\begin{equation}
m = \textrm{ceil} \left( \log{\frac{w_s}{w_t}} \times \frac{1}{\log{2}} \right)
\end{equation}

In [8]:
# Initialize collection for export parameters
export_data = {}
# Update max level
maxest_level = 0
# Set format
fmt = 'png'

# Iterate through stacks
for stack, df_stack in df_project.groupby('stack'):

    # Determine `max_level` such that the full section is in view when fully zoomed out
    w_tile = 1024
    h_tile = 1024
    stack_bounds = renderapi.stack.get_stack_bounds(stack=stack,
                                                    render=render)
    w_stack = max(stack_bounds['maxX'] - stack_bounds['minX'],
                  stack_bounds['maxY'] - stack_bounds['minY'])
    max_level = int(np.ceil(np.log(w_stack / w_tile) * 1/np.log(2)))
    # Export each stack to highest level in the project
    maxest_level = max(max_level, maxest_level)

    # Set parameters for export to CATMAID
    export_params = CatmaidBoxesParameters(stack=stack,
                                           root_directory=export_dir.parent.as_posix(),
                                           width=w_tile,
                                           height=h_tile,
                                           max_level=maxest_level,
                                           fmt=fmt,
                                           project=project,
                                           render=render)

    # Add CATMAID export parameters to collection
    export_data[stack] = export_params

# Preview
stack = sample(export_data.keys(), 1)[0]
list(export_data[stack].to_java_args())

['--stack',
 'Hoechst_correlated',
 '--rootDirectory',
 '/long_term_storage/rlane/CATMAID/projects',
 '--height',
 '1024',
 '--width',
 '1024',
 '--format',
 'png',
 '--maxLevel',
 '6',
 '--baseDataUrl',
 'http://sonic.tnw.tudelft.nl:8080/render-ws/v1',
 '--owner',
 'rlane',
 '--project',
 'pancreas']

### Call render script
`render_catmaid_boxes.sh`
```sh
Usage: java -cp <render-module>-standalone.jar
      org.janelia.render.client.BoxClient [options] Z values for layers to
      render
  Options:
  * --baseDataUrl
      Base web service URL for data (e.g. http://host[:port]/render-ws/v1)
    --binaryMask
      use binary mask (e.g. for DMG data)
      Default: false
    --createIGrid
      create an IGrid file
      Default: false
    --doFilter
      Use ad hoc filter to support alignment
      Default: false
    --filterListName
      Apply this filter list to all rendering (overrides doFilter option)
    --forceGeneration
      Regenerate boxes even if they already exist
      Default: false
    --format
      Format for rendered boxes
      Default: png
  * --height
      Height of each box
    --help
      Display this note
    --label
      Generate single color tile labels instead of actual tile images
      Default: false
    --maxLevel
      Maximum mipmap level to generate
      Default: 0
    --maxOverviewWidthAndHeight
      Max width and height of layer overview image (omit or set to zero to
      disable overview generation)
    --numberOfRenderGroups
      Total number of parallel jobs being used to render this layer (omit if
      only one job is being used)
  * --owner
      Stack owner
  * --project
      Stack project
    --renderGroup
      Index (1-n) that identifies portion of layer to render (omit if only one
      job is being used)
  * --rootDirectory
      Root directory for rendered tiles (e.g.
      /tier2/flyTEM/nobackup/rendered_boxes)
    --skipInterpolation
      skip interpolation (e.g. for DMG data)
      Default: false
  * --stack
      Stack name
  * --width
      Width of each box
```

#### Wrapper for `render_catmaid_boxes` script for multiprocessing
Multiprocessing is done across sections, so a process is created for each section.

In [9]:
def run_render_catmaid_boxes(z, client_script, java_args):
    """Wrapper for `render_catmaid_boxes` script to enable multiprocessing"""
    p = subprocess.run([client_script.as_posix(), f'{z:.0f}'] + java_args)

#### \*\****COMPUTATIONALLY EXPENSIVE*** \**

##### Run `render_catmaid_boxes` on `N_cores`

`renderapi.client.WithPool` ends prematurely (after exporting one section).  
Weirdly only happens with `lil_EM_montaged` stack...

In [10]:
import subprocess
from functools import partial
from multiprocessing import Pool

In [11]:
# Path to `render_catmaid_boxes` shell script
fp_client = Path(render_connect_params['client_scripts']) / 'render_catmaid_boxes.sh'
# Set number of cores for multiprocessing
N_cores = min(15, df_project['z'].unique().size)
# Get z values
z_values = np.unique([renderapi.stack.get_z_values_for_stack(stack, render=render)\
                      for stack in stacks_2_export])

# Iterate through stacks to export
# for stack in tqdm(stacks_2_export):
for stack in tqdm(stacks_2_export):

    # Create java arguments from export parameters
    java_args = list(export_data[stack].to_java_args())

    # Set up `render_catmaid_boxes` client script
    render_catmaid_boxes_partial = partial(run_render_catmaid_boxes,
                                           client_script=fp_client,
                                           java_args=java_args)

    # Run `render_catmaid_boxes` across `N_cores`
    with Pool(N_cores) as pool:
        pool.map(render_catmaid_boxes_partial, z_values)

  0%|          | 0/4 [00:00<?, ?it/s]

## Set up tiles for import to CATMAID
---
### Resort CATMAID tiles
By (unchangeable) default, `render_catmaid_boxes` exports tiles as

`root directory` / `project` / `stack` / `width x height` / `zoomlevel` / `z` / `row` / `col.fmt`

This is ok, but preferred format for importing to CATMAID is [tile source convention 1](https://catmaid.readthedocs.io/en/stable/tile_sources.html#tile-source-types) --- "[File-based image stack](https://catmaid.readthedocs.io/en/stable/tile_sources.html#file-based-image-stack)"

`root directory` / `project` / `stack` / `z` / `row_col_zoomlevel.fmt`

One other tidbit is that CATMAID annoyingly assumes that sections are 0-indexed so $z_{min}$ is subtracted.

#### \*\****CHANGES LOTS & LOTS OF FILEPATHS ON DISK*** \**

In [12]:
from shutil import rmtree
from skimage import io, transform, img_as_ubyte

In [13]:
# Iterate through stacks to export
for stack in tqdm(stacks_2_export):

    # Loop through all the exported tiles per stack
    fps = (export_dir / stack).glob(f"1024x1024/**/[0-9]*.{fmt}")
    for fp in fps:

        # Extract tile info from filepath
        zoom_level = int(fp.parents[2].name)
        z = int(fp.parents[1].name) - int(z_values.min())  # 0-index
        row = int(fp.parents[0].name)
        col = int(fp.stem)

        # Reformat tile
        tile_format_1 = export_dir / stack / f"{z}/{row}_{col}_{zoom_level}.{fmt}"
        tile_format_1.parent.mkdir(parents=True, exist_ok=True)
        fp.rename(tile_format_1)

    # Clean up (now presumably empty) directory tree
    rmtree((export_dir / stack / '1024x1024').as_posix())

  0%|          | 0/4 [00:00<?, ?it/s]

#### Make thumbnails

In [14]:
from icatapi.plotting import colorize, T_HOECHST, T_AF594

In [19]:
# Colorize settings
d_colorize = {
    'Hoechst_correlated': T_HOECHST,
    'AF594_correlated': T_AF594,
}

# Loop through stacks to export
for stack in tqdm(stacks_2_export):

    # Loop through each section
    for z in (z_values - z_values.min()):

        # Load most zoomed out image (0, 0, `maxest_level`)
        image = io.imread(export_dir / f"{stack}/{z:.0f}/0_0_{maxest_level}.{fmt}")
        # Resize
        output_shape = (128, 128)
        thumb = transform.resize(image, output_shape=output_shape)
        # Colorize
        if stack in stacks_FM:
            thumb = colorize(thumb, d_colorize[stack])
        # Save
        fp_thumb = export_dir / f"{stack}/{z:.0f}/small.{fmt}"
        io.imsave(fp_thumb, img_as_ubyte(thumb))

  0%|          | 0/4 [00:00<?, ?it/s]

#### Create `project.yaml` file

In [20]:
import sys
from random import sample
from ruamel.yaml import YAML
from tifffile import TiffFile
from bs4 import BeautifulSoup as Soup
import json

In [21]:
# Set project yaml file
project_yaml = export_dir / 'project.yaml'

# Collect stack data
stack_data = []
for stack in tqdm(stacks_2_export):

    # Get dimension data
    bounds = renderapi.stack.get_stack_bounds(stack=stack,
                                              render=render)
    dimensions = (int((bounds['maxX'] - bounds['minX']) * 1.1),
                  int((bounds['maxY'] - bounds['minY']) * 1.1),
                  int(bounds['maxZ'] - bounds['minZ'] + 1))

    # Get resolution data (base it off OG EM himag resolution data)
    stack_metadata = renderapi.stack.get_full_stack_metadata(stack='EM_himag',
                                                             render=render)
    resolution = (np.round(stack_metadata['currentVersion']['stackResolutionX'], 5),
                  np.round(stack_metadata['currentVersion']['stackResolutionY'], 5),
                  np.round(stack_metadata['currentVersion']['stackResolutionZ'], 5))

    # Get metadata
    ts = sample(renderapi.tilespec.get_tile_specs_from_stack(stack=stack,
                                                             render=render), 1)[0]
    fp = ts.ip[0]['imageUrl'].split('.nl')[1]
    tif = TiffFile(fp)
    metadata = tif.pages[0].description

    # Project data for output to project yaml file
    stack_datum = {
        "title": f"{stack}",
        "dimension": f"{dimensions}",
        "resolution": f"{resolution}",
        "zoomlevels": f"{(maxest_level + 1):.0f}",
        "metadata": metadata,
        "mirrors": [{
            "title": f"{project}_{stack.split('_')[0]}",
            "tile_width": 1024,
            "tile_height": 1024,
            "tile_source_type": 1,
            "fileextension": f"{fmt}",
            "url": f"http://sonic.tnw.tudelft.nl{(export_dir/stack).as_posix()}"
        }]
    }
    stack_data.append(stack_datum)

# Create dict for input into project yaml file
project_data = {
    "project": {
        "title": f"{project}",
        "stacks": stack_data
    }
}

  0%|          | 0/4 [00:00<?, ?it/s]

In [22]:
out = f"""\
{project_yaml}
--------\
"""
print(out)

yaml = YAML()
yaml.indent(mapping=2, offset=0)
yaml.dump(project_data, project_yaml)
yaml.dump(project_data, sys.stdout)

/long_term_storage/rlane/CATMAID/projects/pancreas/project.yaml
--------
project:
  title: pancreas
  stacks:
  - title: EM_himag_stitched
    dimension: (20544, 20870, 4)
    resolution: (4.85951, 4.85951, 100.0)
    zoomlevels: '7'
    metadata: "<?xml version=\"1.0\" encoding=\"UTF-8\"?><OME xmlns=\"http://www.openmicroscopy.org/Schemas/OME/2012-06\"\
      \ xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"\
      http://www.openmicroscopy.org/Schemas/OME/2012-06 http://www.openmicroscopy.org/Schemas/OME/2012-06/ome.xsd\"\
      \ crucial dimensional parameters and other important metadata. Please edit cautiously\
      \ (if at all), and back up the original data before doing so. For more information,\
      \ see the OME-TIFF web site: http://ome-xml.org/wiki/OmeTiff.-->\n    <Instrument\
      \ ID=\"Instrument:0\">\n        <Microscope Manufacturer=\"Delmic\" />\n   \
      \     <Detector ID=\"Detector:0\" Model=\"pcie-6251\" />\n        <Objective\