# Flow from DICED

In [1]:
import numpy as np
import json
import random
import h5py
import sys
from diced import DicedStore
import os
from tqdm import tqdm, trange
import matplotlib.pyplot as plt


In [2]:
store = DicedStore("gs://flyem-public-connectome")
repo = store.open_repo("medulla7column")


In [3]:
grayscale = repo.get_array("grayscale")
groundtruth = repo.get_array("groundtruth")


In [4]:
def load_roi():
    with open('./json_repo/roi.json', 'r') as f:
        roi = json.load(f)
    
    return roi

def get_roi_extent(roi):
    roi_extent = []
    for loc in roi:
        z_extent = slice((loc[0]) * 32, (loc[0] * 32) + 32, None)
        y_extent = slice((loc[1]) * 32, (loc[1] * 32) + 32, None)
        x_extent = slice((loc[2]) * 32, (loc[3] * 32) + 32, None)
        roi_extent.append([z_extent, y_extent, x_extent])
        
    return roi_extent

def get_chunk(array, roi, num_chunks, chunk_depth=32, seed=923):
    """Get 32 x 32 x 32 arrays from the roi extent"""
    
    random.seed(seed)
    
    chunks = []
    
    roi_index = random.sample(range(len(roi)), num_chunks)
    
    for i in roi_index:
        z_loc = roi[i][0] * 32
        y_loc = roi[i][1] * 32
        x_loc = random.choice([loc * 32 for loc in range(roi[i][2], roi[i][3] + 1)])

        z_extent = slice(z_loc, (z_loc + 32), None)
        y_extent = slice(y_loc, (y_loc + 32), None)
        x_extent = slice(x_loc, (x_loc + chunk_depth), None)
        
        chunk = array[[z_extent, y_extent, x_extent]]
        chunks.append(chunk)
        
    return chunks

In [5]:
roi = load_roi()
# chunks = get_chunk(grayscale, roi, num_chunks=100, chunk_depth=32)


# Create dataset as a HDF5 file

In [6]:
def create_fib_dataset(path, 
                      roi,
                      grayscale,
                      groundtruth,
                      input_name,
                      target_name):
    """
    Argument
    :path: The path to save created file. The path should be contain the file name.
    :roi: The list from 'roi.json'.
    :arr_name: The array what you want to use.
    :ds_name: The dataset's name what you want to create.
    """
    
    with h5py.File(path, "a") as f:
        arr_grp = f.create_group("array")
        gry_grp = arr_grp.create_group("grayscale")
        gt_grp = arr_grp.create_group("groundtruth")
                
        for i in trange(len(roi)):
            r = np.array(roi[i])
            rs = r * 32
            ze = slice(rs[0], rs[0]+32, None)
            ye = slice(rs[1], rs[1]+32, None)
            xe = slice(rs[2], rs[3]+32, None)
            
            gry_arr = grayscale[[ze, ye, xe]].reshape((32, 32, -1))
            gry_dset = gry_grp.create_dataset(str(i), data=gry_arr, chunks=(32, 32, 32), compression="lzf")
            
            gt_arr = groundtruth[[ze, ye, xe]].reshape((32, 32, -1))
            gt_dset = gt_grp.create_dataset(str(i), data=gt_arr, chunks=(32, 32, 32), compression="lzf")
                

In [7]:
create_h5_dataset("/media/haein/DATA/FlyEM/fib25.hdf5", roi, grayscale, groundtruth, "grayscale", "groundtruth")

100%|██████████| 9186/9186 [4:42:44<00:00,  1.85s/it]  


In [19]:
h = h5py.File('/media/haein/DATA/FlyEM/fib25.hdf5', 'r')
grayscale = h['/array/grayscale/']


In [20]:
len(grayscale)

9186

In [30]:
arr = grayscale['0'] # [:, :, 0:32].shape

In [46]:
arr[:].shape[2]

1568

In [6]:
from tqdm import trange

target = h5py.File('/media/haein/DATA/FlyEM/fib_cube.hdf5', 'a')

with h5py.File('/media/haein/DATA/FlyEM/fib25.hdf5', 'r') as original:
    arr_path = ['/array/grayscale/', '/array/groundtruth/']
    trg_dset_names = ['grayscale', 'groundtruth']

    for trg_dset_name in trg_dset_names:
        if trg_dset_name == 'grayscale':
            trg_dset = target.create_dataset(trg_dset_name, 
                                             (1, 32, 32, 32), 
                                             maxshape=(None, 32, 32, 32), 
                                             compression='lzf', 
                                             dtype = 'uint8')
        else:
            trg_dset = target.create_dataset(trg_dset_name, 
                                             (1, 32, 32, 32), 
                                             maxshape=(None, 32, 32, 32), 
                                             compression='lzf', 
                                             dtype = 'uint64')
            


    for i in range(2):
        org_dset = original[arr_path[i]]
        trg_dset = target[trg_dset_names[i]]
        
        org_dset_list = org_dset.keys()

        cube_count = 0

        for idx, org_dset_name in enumerate(tqdm(org_dset_list)):
            arr = org_dset[org_dset_name][:]
            num_cubes = int(arr.shape[2] / 32)

            cube_count += num_cubes

            trg_dset.resize(cube_count, axis=0)

            for i in range(num_cubes):
                edge_loc = i * 32
                cube = arr[..., edge_loc:edge_loc+32]
                trg_dset[(cube_count - num_cubes) + i,...] = cube


100%|██████████| 9186/9186 [29:14<00:00,  5.24it/s]
100%|██████████| 9186/9186 [34:51<00:00,  4.39it/s]
