# nima_io: Microscopy Data Reading Tutorial

This notebook demonstrates reading various microscopy file formats,
comparing `nima_io` (bioio-based) with `tifffile`, and inspecting
OME metadata for all available test data.

## Setup

In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

import matplotlib.pyplot as plt
import tifffile

import nima_io.read as ir

tdata = Path("../../tests/data")

## 1. Single-scene OME-TIFF

A simple multi-channel time-series OME-TIFF with known structure:
5 timepoints, 3 channels, 17x13 pixels.

In [None]:
# nima_io: returns xarray.DataArray backed by dask
da = ir.read_image(str(tdata / "im1s1z3c5t_a.ome.tif"))
print(f"dims={da.dims}, shape={da.shape}, dtype={da.dtype}")
da.data

In [None]:
# Access OME metadata from attrs
da.attrs["ome_metadata"].images[0].pixels.channels

In [None]:
# Structured metadata (consolidated from OME)
md = da.attrs["metadata"]
print(md)
print()
print(f"Objective: {md.objective[0]}")
print(f"Pixel size: {md.voxel_size[0]}")
print(f"Date: {md.date[0]}")
print()
for ci, ch in enumerate(md.channels[0]):
    print(
        f"Ch[{ci}]: wavelength={ch.wavelength}nm, "
        f"attenuation={ch.attenuation}, "
        f"exposure={ch.exposure}s, "
        f"gain={ch.gain}, "
        f"binning={ch.binning}"
    )

In [None]:
# tifffile comparison
with tifffile.TiffFile(tdata / "im1s1z3c5t_a.ome.tif") as tif:
    print(f"Series: {len(tif.series)}")
    s = tif.series[0]
    print(f"shape={s.shape}, axes={s.axes}, dtype={s.dtype}")
    print(f"OME: {tif.is_ome}")

### OME Metadata

Access the full OME metadata object via bioio.

In [None]:
from bioio import BioImage

img = BioImage(tdata / "im1s1z3c5t_a.ome.tif")
ome = img.ome_metadata
px = ome.images[0].pixels
print(f"Image: {ome.images[0].name or ome.images[0].id}")
print(
    f"Dims: X={px.size_x}, Y={px.size_y}, C={px.size_c}, T={px.size_t}, Z={px.size_z}"
)
print(f"Pixel sizes: {img.physical_pixel_sizes}")
print("Channels:")
for ci, ch in enumerate(px.channels):
    ls = ch.light_source_settings
    wl = ls.wavelength if ls else None
    print(f"  [{ci}] id={ch.id}, wavelength={wl}")

### Per-channel acquisition settings

Exposure time and timestamps live in `planes` (one per T/C/Z
combination). Channel-level settings (wavelength, attenuation,
binning, gain) are on the `Channel` object. Combine both for
a complete per-channel summary.

In [None]:
# Per-channel acquisition summary
for ci, ch in enumerate(px.channels):
    ls = ch.light_source_settings
    ds = ch.detector_settings
    # Exposure from the first plane of this channel
    plane = next(p for p in px.planes if p.the_c == ci)
    print(
        f"Ch[{ci}]: "
        f"wl={ls.wavelength if ls else None}, "
        f"att={ls.attenuation if ls else None}, "
        f"exposure={plane.exposure_time}, "
        f"binning={ds.binning if ds else None}, "
        f"gain={ds.gain if ds else None}"
    )

### Channel naming

Assign semantic channel names for ratio analysis.

In [None]:
da_named = ir.read_image(
    str(tdata / "im1s1z3c5t_a.ome.tif"),
    channels=["G", "R", "C"],
)
print(f"Channel coords: {list(da_named.coords['C'].values)}")
da_named.sel(C="G", T=0).data

## 2. Multi-channel time-series OME-TIFF

A 7-timepoint, 3-channel image without wavelength metadata.

In [None]:
da_mcts = ir.read_image(str(tdata / "multi-channel-time-series.ome.tif"))
print(f"dims={da_mcts.dims}, shape={da_mcts.shape}, dtype={da_mcts.dtype}")

img_mcts = BioImage(tdata / "multi-channel-time-series.ome.tif")
print(f"Channel names: {img_mcts.channel_names}")
print(f"Pixel sizes: {img_mcts.physical_pixel_sizes}")
print(f"Image name: {img_mcts.ome_metadata.images[0].name}")

## 3. File sequences with tifffile.TiffSequence

For sets of related TIFF files, `tifffile.TiffSequence` stacks them.
This is useful when acquisitions split across multiple files.

In [None]:
fp_glob = str(tdata / "im1s1z3c5t_?.ome.tif")

tifs = tifffile.TiffSequence(fp_glob)
d = tifs.asarray()
print(f"Glob matched {len(tifs)} files")
print(f"Stacked shape: {d.shape}")
print("Individual files:")
for f in sorted(tifs):
    print(f"  {Path(f).name}")

## 4. Tiled images (FEI multi-scene)

FEI microscopes save tiled acquisitions as multi-scene OME-TIFFs.
Each scene is one tile with stage position metadata.

### 4a. Regular tile grid (t4_1.tif)

In [None]:
# bioio sees each tile as a separate scene
img_tile = BioImage(tdata / "t4_1.tif")
print(f"Scenes: {len(img_tile.scenes)}")
print(f"Per-tile shape: {img_tile.shape}")
print(f"Pixel sizes: {img_tile.physical_pixel_sizes}")

# tifffile comparison
with tifffile.TiffFile(tdata / "t4_1.tif") as tif:
    print(f"\ntifffile series: {len(tif.series)}")
    print(f"Per-series shape: {tif.series[0].shape}, axes={tif.series[0].axes}")

In [None]:
# Stitch into a single DataArray
stitched = ir.stitch_scenes(str(tdata / "t4_1.tif"))
print(f"Stitched: dims={stitched.dims}, shape=T{stitched.sizes['T']}")
print(f"  Y={stitched.sizes['Y']}, X={stitched.sizes['X']}")
stitched.data

In [None]:
# Tilemap shows scene layout (row, col) -> scene_index
import numpy as np

tilemap = stitched.attrs["tilemap"]
print(f"Tile grid: {tilemap.shape[0]} rows x {tilemap.shape[1]} cols")
print(tilemap)

### 4b. Tile grid with void tiles (tile6_1.tif)

In [None]:
stitched_void = ir.stitch_scenes(str(tdata / "tile6_1.tif"))
print(f"Stitched: Y={stitched_void.sizes['Y']}, X={stitched_void.sizes['X']}")

tilemap_void = stitched_void.attrs["tilemap"]
print(f"Tile grid ({tilemap_void.shape}):")
print(tilemap_void)
print(f"Void tiles (=-1): {np.sum(tilemap_void == -1)}")

In [None]:
plt.imshow(stitched_void.sel(T=1, Z=0, C=1), cmap="Reds", vmax=1000, vmin=1)
plt.colorbar()

### Stage positions

OME metadata provides physical stage positions for each tile.

In [None]:
ome_tile = BioImage(tdata / "tile6_1.tif").ome_metadata
print(f"{'Scene':>5} {'X pos':>10} {'Y pos':>10}")
for i, im in enumerate(ome_tile.images):
    p = im.pixels.planes[0]
    print(f"{i:5d} {float(p.position_x):10.2f} {float(p.position_y):10.2f}")

## 5. TF8 format (.tf8)

TF8 files are TIFFs with a non-standard extension.
`nima_io` handles this transparently via a temp symlink.

In [None]:
da_tf8 = ir.read_image(str(tdata / "LC26GFP_1.tf8"))
print(f"dims={da_tf8.dims}, shape={da_tf8.shape}, dtype={da_tf8.dtype}")

## 6. Large single-scene image (exp2_2.tif)

81 timepoints, 2 channels (340nm/380nm ratiometric), 1200x1600 pixels.

In [None]:
img_exp = BioImage(tdata / "exp2_2.tif")
print(f"Shape: {img_exp.shape}")
print(f"Pixel sizes: {img_exp.physical_pixel_sizes}")

ome_exp = img_exp.ome_metadata
px_exp = ome_exp.images[0].pixels
for ci, ch in enumerate(px_exp.channels):
    ls = ch.light_source_settings
    wl = ls.wavelength if ls else None
    print(f"  Ch[{ci}]: wavelength={wl}")

# Instrument metadata
if ome_exp.instruments:
    inst = ome_exp.instruments[0]
    if inst.objectives:
        obj = inst.objectives[0]
        print(f"Objective: NA={obj.lens_na}, mag={obj.nominal_magnification}")
    if inst.detectors:
        print(f"Detector: {inst.detectors[0].model}")

In [None]:
# Lazy read - no data loaded until .values or .compute()
da_exp = ir.read_image(str(tdata / "exp2_2.tif"))
print(f"Lazy DataArray: {da_exp.dims}, {da_exp.shape}")
print(f"Dask chunks: {da_exp.data.chunks}")

## 7. Leica LIF files

Multi-scene confocal Z-stacks read via bioio-lif (pure Python, no Java).

In [None]:
img_lif = BioImage(tdata / "2015Aug28_TransHXB2_50min+DMSO.lif")
print(f"Reader: {type(img_lif.reader).__module__}")
print(f"Scenes ({len(img_lif.scenes)}): {img_lif.scenes}")

for si, scene in enumerate(img_lif.scenes):
    img_lif.set_scene(si)
    print(
        f"  {scene}: shape={img_lif.shape}, "
        f"channels={img_lif.channel_names}, "
        f"voxel_z={img_lif.physical_pixel_sizes.Z}"
    )

## 8. File comparison (diff)

Compare two files for pixel-level equality.

In [None]:
a = str(tdata / "im1s1z3c5t_a.ome.tif")
b = str(tdata / "im1s1z3c5t_b.ome.tif")
bpix = str(tdata / "im1s1z3c5t_bpix.ome.tif")

print(f"a vs a (identical):  {ir.diff(a, a)}")
print(f"a vs b (same data):  {ir.diff(a, b)}")
print(f"a vs bpix (1px off): {ir.diff(a, bpix)}")

## 9. Backend comparison: nima_io vs tifffile

Key differences between reading with `nima_io` (bioio) and raw `tifffile`.

In [None]:
# tifffile: raw arrays, manual dimension handling
with tifffile.TiffFile(tdata / "t4_1.tif") as tif:
    # Each series is a tile - tifffile doesn't auto-stitch
    tf_data = tif.series[0].asarray()
    print(f"tifffile single series: shape={tf_data.shape}, axes={tif.series[0].axes}")

# nima_io: auto-stitched, named dims, lazy
nio_data = ir.stitch_scenes(str(tdata / "t4_1.tif"))
print(f"nima_io stitched:     shape={dict(nio_data.sizes)}")
print(f"  lazy (dask):        {type(nio_data.data).__name__}")

In [None]:
# tifffile: reading OME metadata requires manual XML parsing
with tifffile.TiffFile(tdata / "im1s1z3c5t_a.ome.tif") as tif:
    # tifffile exposes raw OME-XML string
    ome_xml = tif.ome_metadata  # raw XML string
    print(f"tifffile OME-XML: {type(ome_xml).__name__}, {len(ome_xml)} chars")

# nima_io/bioio: parsed OME object with typed attributes
img = BioImage(tdata / "im1s1z3c5t_a.ome.tif")
ome = img.ome_metadata  # ome_types.OME object
print(f"bioio OME: {type(ome).__name__}")
print(f"  images: {len(ome.images)}")
print(f"  instruments: {len(ome.instruments)}")
ch0 = ome.images[0].pixels.channels[0]
wl = ch0.light_source_settings.wavelength
print(f"  channels[0].wavelength: {wl}")