See https://xarray-datatree.readthedocs.io/en/latest/hierarchical-data.html 

Test the `.siblings`, `.children`, `.parent`, `.root` properties, as well as `.is_leaf`, `.leaves`

In [None]:
from datatree import DataTree, open_datatree
import xarray as xr 
import numpy as np

In [None]:
bart = DataTree(name="Bart")
lisa = DataTree(name="Lisa")
homer = DataTree(name="Homer", children={"Bart": bart, "Lisa": lisa})
print(homer)

In [None]:
list(bart.siblings)

In [None]:
maggie = DataTree(name="Maggie")
homer.children = {"Bart": bart, "Lisa": lisa, "Maggie": maggie}
print(homer)

In [None]:
maggie.parent.name

In [None]:
abe = DataTree(name="Abe")
homer.parent = abe
maggie.root.name

In [None]:
print(abe)

In [None]:
herbert = DataTree(name="Herb")
new_abe = abe.assign({"Herbert": herbert})
print(abe)
print(new_abe)
# Note that Herb is renamed to Herbert, 
# and abe is not modified in-place

In [None]:
from datatree import InvalidTreeError
try:
    abe.parent = homer
except InvalidTreeError as err:
    print(err)
    

Ancestry in an Evolutionary Tree

In [None]:
vertebrates = DataTree.from_dict(
    name="Vertebrae",
    d={
        "/Sharks": None,
        "/Bony Skeleton/Ray-finned Fish": None,
        "/Bony Skeleton/Four Limbs/Amphibians": None,
        "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates": None,
        "/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Rodents & Rabbits": None,
        "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs": None,
        "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Birds": None,
    },
)

In [None]:
primates = vertebrates["/Bony Skeleton/Four Limbs/Amniotic Egg/Hair/Primates"]

In [None]:
dinosaurs = vertebrates[
    "/Bony Skeleton/Four Limbs/Amniotic Egg/Two Fenestrae/Dinosaurs"
]

In [None]:
print(vertebrates)

In [None]:
primates.is_leaf

In [None]:
[node.name for node in vertebrates.leaves]

In [None]:
[node.name for node in primates.ancestors]


In [None]:
primates.root.name

In [None]:
primates.find_common_ancestor(dinosaurs).name

In [None]:
alien = DataTree(name="Xenomorph")

In [None]:
from datatree import NotFoundInTreeError
try:
    primates.find_common_ancestor(alien)
except NotFoundInTreeError as err:
    print(err)

In [None]:
# Depth-first
for node in vertebrates.subtree:

    print(node.path)

In [None]:
node.has_data

In [None]:
node

In [None]:
homer.is_hollow # Homer does not carry arrays, only trees

In [None]:
homer['age'] = xr.DataArray(45)

In [None]:
homer.is_hollow # now Homer carry an array so it is not hollow anymore

In [None]:
dt = DataTree.from_dict(
    {
        "/a/A": None,
        "/a/B": None,
        "/b/A": None,
        "/b/B": None,
    }
)

In [None]:
dt['/a/B'].path

In [None]:
result = dt.match("*/B")

In [None]:
from pathlib import Path, PurePosixPath

path = PurePosixPath('*/test/a/b')
print(str(path), type(path))

In [None]:
simpsons = DataTree.from_dict(
    d={
        "/": xr.Dataset({"age": 83}),
        "/Herbert": xr.Dataset({"age": 40}),
        "/Homer": xr.Dataset({"age": 39}),
        "/Homer/Bart": xr.Dataset({"age": 10}),
        "/Homer/Lisa": xr.Dataset({"age": 8}),
        "/Homer/Maggie": xr.Dataset({"age": 1}),
    },
    name="Abe",
)
print(simpsons)

In [None]:
print(simpsons.filter(lambda node: node["age"] > 18))

In [None]:
[thing for thing in simpsons]

In [None]:
[thing for thing in simpsons.subtree]

In [None]:
print(list(node  for node in simpsons.subtree if node["age"] > 18))

In [None]:
In [54]: def time_stamps(n_samples, T):
   ....:     """Create an array of evenly-spaced time stamps"""
   ....:     return xr.DataArray(
   ....:         data=np.linspace(0, 2 * np.pi * T, n_samples), dims=["time"]
   ....:     )
   ....: 

In [55]: def signal_generator(t, f, A, phase):
   ....:     """Generate an example electrical-like waveform"""
   ....:     return A * np.sin(f * t.data + phase)
   ....: 

In [56]: time_stamps1 = time_stamps(n_samples=15, T=1.5)

In [57]: time_stamps2 = time_stamps(n_samples=10, T=1.0)

In [58]: voltages = DataTree.from_dict(
   ....:     {
   ....:         "/oscilloscope1": xr.Dataset(
   ....:             {
   ....:                 "potential": (
   ....:                     "time",
   ....:                     signal_generator(time_stamps1, f=2, A=1.2, phase=0.5),
   ....:                 ),
   ....:                 "current": (
   ....:                     "time",
   ....:                     signal_generator(time_stamps1, f=2, A=1.2, phase=1),
   ....:                 ),
   ....:             },
   ....:             coords={"time": time_stamps1},
   ....:         ),
   ....:         "/oscilloscope2": xr.Dataset(
   ....:             {
   ....:                 "potential": (
   ....:                     "time",
   ....:                     signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.2),
   ....:                 ),
   ....:                 "current": (
   ....:                     "time",
   ....:                     signal_generator(time_stamps2, f=1.6, A=1.6, phase=0.7),
   ....:                 ),
   ....:             },
   ....:             coords={"time": time_stamps2},
   ....:         ),
   ....:     }
   ....: )
   ....: 

In [59]: voltages


In [None]:
zarr_path= Path().resolve() / '../generated' / 'simpsons.zarr'


In [None]:
simpsons.to_zarr(zarr_path)

In [None]:
reopened = open_datatree(zarr_path,engine="zarr")
reopened

In [None]:
import dask.array as da
rng = np.random.default_rng()

xda = xr.DataArray(
    [da.from_array(rng.standard_normal(18), chunks=3) for i in range(3)],
    coords = {'label': ['a', 'b', 'c'] ,'z': list(range(18))}
)
print("da is realized..., ")
print(xda)
xda = xda.chunk( {'label': 2, 'z': 4})
# xda = xda.chunk( {'label': 1, 'z': 3})
print("reconvert to dask array")
print(xda)
print(xda.chunks)
print(xda.chunksizes)
xda

In [None]:
dt_with_str_dim_labels = DataTree(
    xr.Dataset({"the_array": xda})
)
dt_with_str_dim_labels.the_array

In [None]:
dt_with_str_dim_labels.to_zarr(zarr_path / '../labelled.zarr')

In [None]:
# Without chunks=auto...
reopened_dt_with_str_dim_labels_no_chunk = open_datatree(zarr_path/ '../labelled.zarr',engine="zarr")
reopened_dt_with_str_dim_labels_no_chunk.the_array

In [None]:
# Bug: datatree does not take into account chunks = auto!

reopened_dt_with_str_dim_labels = open_datatree(zarr_path/ '../labelled.zarr', engine="zarr", chunks={'label': 3, 'z': 5})
reopened_dt_with_str_dim_labels.the_array

In [None]:

# When provided explicitly it works, but we can give anything we want and does not map to 
# the actual way the data is stored... risk of rechunk?
reopened_dt_with_str_dim_labels = open_datatree(zarr_path/ '../labelled.zarr', engine="zarr", chunks={'label': 3, 'z': 5})
reopened_dt_with_str_dim_labels.the_array

In [None]:
from xarray import open_zarr
open_zarr(zarr_path/ '../labelled.zarr', chunks='auto').the_array