## Initial insertion of masks into the existing n5 containers

Rohit Batra generated a extracellular space masks for each dataset, which needed to be integrated into the existing \*.n5 containers. This notebook, run in mid-august of 2019, used directory operations and `zarr` to insert each mask into its n5 container and update the attributes of the mask to include the resolution metadata. 

Going forward, the process of creating will target the n5 containers directly, so these operations will not be needed.

In [1]:
from pathlib import Path
from glob import glob
import zarr
def res_from_fname(path):
    import re
    p = re.compile('[0-9]x[0-9]x[0-9]')
    fname = Path(path).parts[-1]
    hit = p.findall(fname)
    if len(hit) > 0:
        res = dict(zip(('x','y','z'), map(int, hit[0].split('x'))))
    else:
        res = None
    return res

def name_from_fname(arg):    
    return Path(arg).parts[-1].split('.')[0]

def attribute_formatter(name, resolution):  
    xyz = list(resolution.values())
    resdict = dict(unit= 'nm', dimensions=xyz)
    attrdict = dict(name=name, pixelResolution=resdict)
    return attrdict

def process_fname(fname):
    resolution = res_from_fname(fname)
    if resolution is None:
        return None
    name = name_from_fname(fname)
    attrs = attribute_formatter(name, resolution)
    return attrs

In [2]:
n5s = sorted(glob('/groups/cosem/cosem/data/*/*.n5'))
n5s = [n for n in n5s if res_from_fname(n)]

In [4]:
for n5f in n5s:
    attrs = process_fname(n5f)
    zarr.open(zarr.N5Store(n5f))['/volumes/raw/'].attrs.update(attrs)

In [7]:
zarr.open(zarr.N5Store(n5s[1]))['/volumes/raw'].attrs['pixelResolution']

{'dimensions': [4, 4, 4], 'unit': 'nm'}

In [11]:
import numpy as np
from functools import reduce
from operator import add
from fst.io import read
n5masks = sorted(glob('/groups/cosem/cosem/masks/*.n5'))
n5s = sorted(glob('/groups/cosem/cosem/data/*/*.n5'))
n5s = [n for n in n5s if res_from_fname(n)]
n5s.pop(n5s.index('/groups/cosem/cosem/data/Mouse_NA3-3_4x4x4nm/Mouse_NA3-3_4x4x4nm.n5'))
n5s.pop(n5s.index('/groups/cosem/cosem/data/Pancreas_Islets_4x4x4m/Pancreas_G36-2_4x4x4nm.n5'))

'/groups/cosem/cosem/data/Pancreas_Islets_4x4x4m/Pancreas_G36-2_4x4x4nm.n5'

In [52]:
correspondences = list(zip(n5masks, n5s))

def new_resolution(raw_n5):
    raw = read(raw_n5)['volumes/raw']
    mask = read(raw_n5)['volumes/mask']
    
    raw_pixel_size = raw.attrs['pixelResolution']['dimensions']
    ratio = np.ceil(np.array(raw.shape) / np.array(mask.shape)).astype('int')
    new_res = map(int, ratio * np.array(list(raw_pixel_size)).tolist())
    return dict(zip(('x','y','z'), new_res))


In [33]:
attribute_formatter(Path(n5s[0]).parts[-1], new_resolution(n5s[0]))

{'name': 'Cryo_LoadID277_Cell11_8x8x8nm_bigwarped_v17.n5',
 'pixelResolution': {'unit': 'nm', 'dimensions': [16, 16, 16]}}

In [21]:
tmp = read(n5s[0])
n5s[0]

'/groups/cosem/cosem/data/COS7_Cell11_8x8x8nm/Cryo_LoadID277_Cell11_8x8x8nm_bigwarped_v17.n5'

In [216]:
import shutil
def write_mask_dataset(n5_raw, n5_mask):
    # copy mask data into raw root directory 
    mask_dest = Path(n5_raw) / 'volumes/mask'
    mask_source = Path(n5_mask) / 'volumes/raw'
    shutil.copytree(mask_source, mask_dest)

In [None]:
# make masks a dataset e.g. volumes/masks/cell_mask

In [220]:
[write_mask_dataset(c[1], c[0]) for c in correspondences[1:]]

[None, None, None, None, None, None, None, None, None, None, None]

In [55]:
# update mask metadata
def update_mask_attrs(n5_raw):
    group = read(n5_raw)
    name = group['/volumes/raw'].attrs['name']
    attrs = attribute_formatter(name, new_resolution(n5_raw))
    zarr.open(zarr.N5Store(n5_raw))['/volumes/mask'].attrs.update(attrs)
    return attrs



In [56]:
[update_mask_attrs(n5) for n5 in n5s]

[{'name': 'Cryo_LoadID277_Cell11_8x8x8nm_bigwarped_v17',
  'pixelResolution': {'unit': 'nm', 'dimensions': [16, 16, 16]}},
 {'name': 'Chlamydomonas_4x4x4nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [8, 8, 8]}},
 {'name': 'HeLa_Cell1_D05-10_8x8x8nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [16, 16, 16]}},
 {'name': 'HeLa_Cell21_8x8x8nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [16, 16, 16]}},
 {'name': 'HeLa_Cell25_flat_8x8x8nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [16, 16, 16]}},
 {'name': 'Aubrey_17-7_17_Cell2_4x4x4nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [8, 8, 8]}},
 {'name': 'Aubrey_17-7_17_Cell3_4x4x4nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [8, 8, 8]}},
 {'name': 'Jurkat_Cell1_FS96-Area1_4x4x4nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [8, 8, 8]}},
 {'name': 'Cryo_FS80_Cell2_4x4x4nm',
  'pixelResolution': {'unit': 'nm', 'dimensions': [8, 8, 8]}},
 {'name': 'Cryo_20171009_WT45_Cell2_4x4x4nm',
  'pixe