## Generate a collection of images, preprocess them, and save in a chunked store

Create the ingredients for an imaging dataset:

- a function that produces an image (here, a numpy array)
- a function that generates a file on disk containing the image data

In [None]:
import numpy as np
import atexit
import os
import shutil
import time
from tifffile import imsave

def get_tmpdir(path):
    import atexit
    import shutil
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path, exist_ok=True)
    atexit.register(shutil.rmtree, path)
    return path

def get_img(z):
    y,x = np.meshgrid(np.arange(-256,256), np.arange(-256,256))
    time.sleep(.025)
    return (y ^ x ^ z).astype('uint8')

def save_img(img, fname):
    try:
        imsave(fname, img)
        return 0
    except:
        return 1

### Save the images serially 

In [None]:
%%time
results = []
tmpdir = get_tmpdir('data1')

for z in range(512):
    fname = os.path.join(tmpdir, str(z).zfill(4) + '.tif')
    img = get_img(z)
    results.append(save_img(img, fname))

print(np.all(np.array(results) == 0))

### Save the images in parallel

In [None]:
from distributed import Client, LocalCluster

cluster = LocalCluster()
client = Client(cluster)
client

In [None]:
%%time
futures = []
tmpdir2 = get_tmpdir('data2')

for z in range(512):
    fname = os.path.join(tmpdir2, str(z).zfill(4) + '.tif')
    img = client.submit(get_img, z)
    futures.append(client.submit(save_img, img, fname))
    
results = client.gather(futures)

print(np.all(np.array(results) == 0))

### Ingest image data 

In [None]:
import dask.array as da
from dask_image.imread import imread as dimread

darr = dimread(tmpdir2 + '/*.tif')
darr

### Generate a multiresolution pyramid

In [None]:
iso_chunks = (64,) * darr.ndim 
reducer = np.mean
pyramid = {}
pyramid['s0'] = darr
pyramid['s1'] = da.coarsen(reducer, darr, {k: 2 for k in range(darr.ndim)}).astype(darr.dtype)
pyramid['s2'] = da.coarsen(reducer, darr, {k: 4 for k in range(darr.ndim)}).astype(darr.dtype)
pyramid = {k: v.rechunk(iso_chunks) for k,v in pyramid.items()}

### Save multiresolution data to disk in a chunked format

In [None]:
import zarr
from numcodecs import GZip

n5_path = get_tmpdir( 'test.n5')
save_chunks = (64,) * darr.ndim

neuroglancer_attributes = {'axes' : ['z','y','x'], 'scales': [[1,1,1], [2,2,2], [4,4,4]], 'unit': 'nm'}

group = zarr.open(zarr.N5Store(n5_path), mode='w')
group.attrs.update(neuroglancer_attributes)

arrays = []
for k,v in pyramid.items():
    arrays.append(group.zeros(name=k, shape=v.shape, dtype=v.dtype, chunks=save_chunks, compressor=GZip(-1)))

In [None]:
da.store(pyramid.values(), arrays, lock=None)