# Toy Example for CHX

In [1]:
import dask
import dask.array
import distributed
import h5py
import numpy
from pathlib import Path

from tiled.client import from_config

## Generate Example Data

In [2]:
def generate_example_data(path, num_files, shape, chunks, fraction_nonzero):
    path = Path(path)
    path.mkdir(exist_ok=True)
    for i in range(num_files):
        with h5py.File(path / f"file_{i:03}.h5", "w") as file:
            data = numpy.random.randint(0, 2**16 - 1, shape)
            data[numpy.random.random(shape) > fraction_nonzero] = 0
            file.create_dataset("data", data=data, chunks=chunks)

# Comment this out after running once to avoid re-creating each time.
# generate_example_data(
#     path="./example_data",
#     num_files=3,
#     shape=(100, 1024, 1024),
#     chunks=(10, 1024, 1024),
#     fraction_nonzero=0.05,
# )    

## Serve data with Tiled (in-process)

In [3]:
config = {
    "trees": [
        {
            "tree": "files",
             "path": "/",
             "args": {
                 "directory": "./example_data",
                },
        }
    ],
}

client = from_config(config, structure_clients="dask")
client

OBJECT CACHE: Will use up to 6_290_642_534 bytes (15% of total physical RAM)


<Node {'file_000', 'file_001', 'file_002'}>

In [4]:
client["file_000"]

<Node {'data'}>

In [5]:
client["file_000"]["data"]

<DaskArrayClient shape=(100, 1024, 1024) chunks=((10, 10, 10, 10, 10, 10, 10, 10, 10, 10), (1024,), (1024,)) dtype=int64>

In [6]:
raw_data = dask.array.concatenate([node["data"][:] for node in client.values()])
raw_data

Unnamed: 0,Array,Chunk
Bytes,2.34 GiB,80.00 MiB
Shape,"(300, 1024, 1024)","(10, 1024, 1024)"
Count,4 Graph Layers,30 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 2.34 GiB 80.00 MiB Shape (300, 1024, 1024) (10, 1024, 1024) Count 4 Graph Layers 30 Chunks Type int64 numpy.ndarray",1024  1024  300,

Unnamed: 0,Array,Chunk
Bytes,2.34 GiB,80.00 MiB
Shape,"(300, 1024, 1024)","(10, 1024, 1024)"
Count,4 Graph Layers,30 Chunks
Type,int64,numpy.ndarray


In [7]:
mask = numpy.random.random(client["file_000"]["data"].shape[1:]) > 0.02
masked = raw_data * mask
flipped = numpy.flip(masked, axis=1)
flipped

Unnamed: 0,Array,Chunk
Bytes,2.34 GiB,80.00 MiB
Shape,"(300, 1024, 1024)","(10, 1024, 1024)"
Count,7 Graph Layers,30 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 2.34 GiB 80.00 MiB Shape (300, 1024, 1024) (10, 1024, 1024) Count 7 Graph Layers 30 Chunks Type int64 numpy.ndarray",1024  1024  300,

Unnamed: 0,Array,Chunk
Bytes,2.34 GiB,80.00 MiB
Shape,"(300, 1024, 1024)","(10, 1024, 1024)"
Count,7 Graph Layers,30 Chunks
Type,int64,numpy.ndarray


In [8]:
flipped.visualize()

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [9]:
flipped.compute()

array([[[    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        ...,
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0]],

       [[    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        ...,
        [45948,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0]],

       [[    0, 63954,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0,     0, ...,     0, 47739,     0],
        ...,
        [    0,     0,     0, ...,     0,     0,     0],
        [    0,     0, 50033, ...,     0,    