## Imports

In [15]:
# !conda activate n2v
%load_ext autoreload

import numpy as np
# from matplotlib import pyplot as plt
import sys
import random
import zarr
from PIL import Image
from skimage import data
from skimage import filters
from skimage import metrics

import gunpowder as gp
import logging
logging.basicConfig(level=logging.INFO)

# from this repo
# from segway.tasks.make_zarr_from_tiff import task_make_zarr_from_tiff_volume as tif2zarr
from boilerPlate import GaussBlur

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
# ADD HELPER FUNCTIONS

def noise_pipe(src, target, pipeline, noise_order, noise_dict):
    this_array = src
    for noise in noise_order:
        if noise_dict[noise]:
            if noise =='downX' and noise_dict[noise]:
                pipeline += gp.DownSample(src, (1, noise_dict[noise], noise_dict[noise]), target) # assumes zyx coordinates (and non-isometric)
                this_array = target
            elif noise =='gaussBlur' and noise_dict[noise]:
                pipeline += GaussBlur(this_array, noise_dict[noise])
            elif noise =='gaussNoise' and noise_dict[noise]:
                pipeline += gp.NoiseAugment(this_array, mode='gaussian', var=noise_dict[noise])
            elif noise =='poissNoise' and noise_dict[noise]:
                pipeline += gp.NoiseAugment(this_array, mode='poisson')
            # elif noise =='deform' and noise_dict[noise]: # TODO: IMPLEMENT
            #     pipeline += ...

    return pipeline

# Specify Parameters (source, noise type, downsampling, etc.)

In [17]:
noise_version = '' # for making multiple independently generated noise versions (e.g. for Fourier Shell analysis)
src_path = '/n/groups/htem/ESRF_id16a/tomo_ML/synapse/cb2/' # PATH FOR ZARR

raw_name = 'raw'
noise_dict = {'downX': 8, # cudegy mimic of 30nm pixel size (max uttained) from sensor at ESRF.i16a X-ray source, assuming 4nm voxel size EM source images
         'gaussBlur': 30, # cudegy mimic of 30nm resolution of KB mirrors at ESRF.i16a X-ray source
         'gaussNoise': None, # ASSUMES MEAN = 0, THIS SETS VARIANCE
         'poissNoise': True, # cudegy mimic of sensor shot noise (hot pixels) at ESRF.i16a X-ray source
        #  'deform': , # TODO: IMPLEMENT
         }

noise_order = ['gaussBlur', 
               'downX', 
               'gaussNoise', 
               'poissNoise'
               ]

# noise_order = ['downX', 
#                'gaussBlur', 
#                'gaussNoise', 
#                'poissNoise'
#                ]

samples = [
    # 'ml0', # should be already done
    #'ml1',
    #'cutout1',
    #'cutout2',
    'cutout5',
    'cutout6',
    'cutout7',
    ]

src_voxel_size = (40, 4, 4)

In [18]:
##### BELOW IS AUTOMATIC PARAMETER SETUP BASED ON ABOVE SPECIFICATIONS ######

noise_name = ''
for noise in noise_order:
    if noise_dict[noise]:
        if str(noise_dict[noise]).isnumeric():
            noise_name += noise + str(noise_dict[noise]) + '_'
        elif isinstance(noise_dict[noise], bool):
            noise_name += noise + '_'

if noise_version == '':
    noise_name = noise_name[:-1]
else:
    noise_name += noise_version

print('Layer name for noised data: ' + noise_name)

if noise_dict['downX']:
    dest_voxel_size = [src_voxel_size[s] * noise_dict['downX'] if s > 0 else src_voxel_size[s] for s in range(len(src_voxel_size))]
else:
    dest_voxel_size = src_voxel_size
src_voxel_size = gp.Coordinate(src_voxel_size)
dest_voxel_size = gp.Coordinate(dest_voxel_size)


Layer name for noised data: gaussBlur30_downX8_poissNoise


# Setup Noising Pipeline

In [25]:
%autoreload
from boilerPlate import GaussBlur

In [22]:
# assemble pipeline for each volume and run
for sample in samples:    
        # declare arrays to use in the pipeline
        raw = gp.ArrayKey('RAW') # raw data
        noisy = gp.ArrayKey('NOISY') # data noise added
        raw_spec = gp.ArraySpec(voxel_size=src_voxel_size, interpolatable=True)

        stack = gp.Stack(8)

        # request matching the model input and output sizes
        scan_request = gp.BatchRequest()
        scan_request.add(raw, (40, 256, 256))#, src_voxel_size)
        scan_request.add(noisy, (40, 256, 256))#, dest_voxel_size)
        # scan_request[noisy].dtype = np.float64

        scan = gp.Scan(scan_request, num_workers=4)

        # request an empty batch from scan
        request = gp.BatchRequest()

        # setup Cache
        cache = gp.PreCache()

        # get performance stats
        performance = gp.PrintProfilingStats(every=100)
        # $src_path$volume/$volume.zarr/volumes/$layer
        src = f'{src_path}{sample}/{sample}.zarr/volumes'
        zarr.open(src)
        source = gp.ZarrSource(    # add the data source
                src,  # the zarr container
                {raw: raw_name},  # which dataset to associate to the array key
                {raw: raw_spec}  # meta-information
        )

        destination = gp.ZarrWrite(
                dataset_names = {noisy: noise_name},
                output_dir = f'{src_path}{sample}',
                output_filename = f'{sample}.zarr/volumes',
                dataset_dtypes = {noisy: np.uint8} # save as 0-255 values (should match raw)
        )

        pipeline = (noise_pipe(raw, noisy, source, noise_order, noise_dict) + 
                        #cache +
                        #stack + 
                        destination + 
                        scan + 
                        performance)
        
        with gp.build(pipeline):
                pipeline.request_batch(request)
                print('Finished: ' + src)


INFO:gunpowder.nodes.scan:scanning over 204800 chunks


In [21]:
test = zarr.open('/n/groups/htem/ESRF_id16a/tomo_ML/synapse/cb2/cutout5/cutout5.zarr/volumes')
test.info

0,1
Name,/
Type,zarr.hierarchy.Group
Read-only,False
Store type,zarr.storage.DirectoryStore
No. members,2
No. arrays,1
No. groups,1
Arrays,raw
Groups,labels
