## Generate a collection of images, preprocess them, and save in a chunked store

Create the ingredients for an imaging dataset:

- a function that produces an image (here, a numpy array)
- a function that generates a file on disk containing the image data

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import atexit
import os
import shutil
import time
from tifffile import imsave

def get_tmpdir(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path, exist_ok=True)
    return path

def get_img(z):
    y,x = np.meshgrid(np.arange(-256,256), np.arange(-256,256))
    time.sleep(.025)
    return (y ^ x ^ z).astype('uint8')

def save_img(img, fname):
    try:
        imsave(fname, img)
        return 0
    except:
        return 1

In [2]:
%%time
results = []
tmpdir = get_tmpdir('data1')
for z in range(512):
    fname = os.path.join(tmpdir, str(z).zfill(4) + '.tif')
    img = get_img(z)
    results.append(save_img(img, fname))

print(np.all(np.array(results) == 0))

True
CPU times: user 506 ms, sys: 489 ms, total: 995 ms
Wall time: 13.8 s


In [3]:
from distributed import Client, LocalCluster

cluster = LocalCluster()
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://127.0.0.1:35349  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 6  Cores: 24  Memory: 31.33 GiB


In [4]:
%%time
from distributed import fire_and_forget
futures = []
tmpdir2 = get_tmpdir('data2')

for z in range(512):
    fname = os.path.join(tmpdir2, str(z).zfill(4) + '.tif')
    img = client.submit(get_img, z)
    futures.append(client.submit(save_img, img, fname))
    
results = client.gather(futures)

print(np.all(np.array(results) == 0))

True
CPU times: user 635 ms, sys: 69.8 ms, total: 705 ms
Wall time: 830 ms


## Rechunk image data 

In [5]:
import dask.array as da
from dask_image.imread import imread as dimread

darr = dimread(tmpdir2 + '/*.tif')
darr

Unnamed: 0,Array,Chunk
Bytes,128.00 MiB,256.00 kiB
Shape,"(512, 512, 512)","(1, 512, 512)"
Count,1024 Tasks,512 Chunks
Type,uint8,numpy.ndarray
"Array Chunk Bytes 128.00 MiB 256.00 kiB Shape (512, 512, 512) (1, 512, 512) Count 1024 Tasks 512 Chunks Type uint8 numpy.ndarray",512  512  512,

Unnamed: 0,Array,Chunk
Bytes,128.00 MiB,256.00 kiB
Shape,"(512, 512, 512)","(1, 512, 512)"
Count,1024 Tasks,512 Chunks
Type,uint8,numpy.ndarray


# Generate a multiresolution pyramid

In [7]:
iso_chunks = (64,) * darr.ndim 
reducer = np.mean
pyramid = {}
pyramid['s0'] = darr
pyramid['s1'] = da.coarsen(reducer, darr, {k: 2 for k in range(darr.ndim)}).astype(darr.dtype)
pyramid = {k: v.rechunk(iso_chunks) for k,v in pyramid.items()}

In [8]:
import zarr
from numcodecs import GZip
n5_path = os.path.join(get_tmpdir( 'test.n5'))
save_chunks = (64,) * darr.ndim

neuroglancer_attributes = {'axes' : ['z','y','x'], 'scales': [[1,1,1], [2,2,2]], 'unit': 'nm'}
group = zarr.open(zarr.N5Store(n5_path), mode='w')
group.attrs.update(neuroglancer_attributes)

arrays = []
for k,v in pyramid.items():
    arrays.append(group.zeros(name=k, shape=v.shape, dtype=v.dtype, chunks=save_chunks, compressor=GZip(-1)))

In [9]:
da.store(pyramid.values(), arrays, lock=None)

In [None]:
!serve --cors $n5_path

[32m[39m
[32m   ┌───────────────────────────────────────────────────┐[39m
   [32m│[39m                                                   [32m│[39m
   [32m│[39m   [32mServing![39m                                        [32m│[39m
   [32m│[39m                                                   [32m│[39m
   [32m│[39m   [1m- Local:[22m            http://localhost:5000       [32m│[39m
   [32m│[39m   [1m- On Your Network:[22m  http://192.168.1.154:5000   [32m│[39m
   [32m│[39m                                                   [32m│[39m
   [32m│[39m   [90mCopied local address to clipboard![39m              [32m│[39m
   [32m│[39m                                                   [32m│[39m
[32m   └───────────────────────────────────────────────────┘[39m
[32m[39m
