# Inspect Remote OME-TIFF Metadata

Fetches and displays OME-TIFF metadata from a remote URL using HTTP range requests,
without downloading the full file. Useful for troubleshooting image rendering issues
in the portal (e.g. verifying dimensions, channels, pyramid levels, physical sizes).

In [1]:
import struct
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field

import requests

## Configuration

Paste the full OME-TIFF URL (including any `?token=` query parameter) below.

In [None]:
IMAGE_URL = ""

if not IMAGE_URL:
    raise ValueError("IMAGE_URL is required. Paste the full OME-TIFF URL above.")

## Helper: HTTP Range Request Reader

All metadata is fetched via `Range` headers so only a few KB are downloaded,
even for multi-GB pyramid files.

In [3]:
def fetch_range(url: str, start: int, end: int) -> bytes:
    """Fetch a byte range from a remote URL."""
    r = requests.get(url, headers={"Range": f"bytes={start}-{end}"}, timeout=30)
    r.raise_for_status()
    return r.content


TAG_NAMES = {
    254: "NewSubfileType",
    256: "ImageWidth",
    257: "ImageLength",
    258: "BitsPerSample",
    259: "Compression",
    262: "PhotometricInterpretation",
    270: "ImageDescription",
    273: "StripOffsets",
    277: "SamplesPerPixel",
    278: "RowsPerStrip",
    279: "StripByteCounts",
    282: "XResolution",
    283: "YResolution",
    284: "PlanarConfiguration",
    296: "ResolutionUnit",
    305: "Software",
    317: "Predictor",
    322: "TileWidth",
    323: "TileLength",
    324: "TileOffsets",
    325: "TileByteCounts",
    330: "SubIFDs",
    338: "ExtraSamples",
    339: "SampleFormat",
}

COMPRESSION_NAMES = {
    1: "None",
    2: "CCITT RLE",
    5: "LZW",
    7: "JPEG",
    8: "Deflate",
    32946: "Deflate",
    34712: "JPEG 2000",
}


def _unpack_ifd_value(byte_order: str, dtype: int, val_bytes: bytes):
    """Unpack the value/offset field of a TIFF IFD entry."""
    if dtype == 3:  # SHORT
        return struct.unpack(f"{byte_order}H", val_bytes[:2])[0]
    if dtype == 4:  # LONG
        return struct.unpack(f"{byte_order}I", val_bytes[:4])[0]
    if dtype == 16:  # LONG8 (BigTIFF)
        return struct.unpack(f"{byte_order}Q", val_bytes[:8])[0]
    return struct.unpack(f"{byte_order}Q", val_bytes[:8])[0]


@dataclass
class IFDEntry:
    tag: int
    dtype: int
    count: int
    value: int

    @property
    def name(self):
        return TAG_NAMES.get(self.tag, f"Tag_{self.tag}")


@dataclass
class IFDSummary:
    """Summary of a single TIFF IFD (image or sub-image)."""
    width: int = 0
    height: int = 0
    bits_per_sample: int = 0
    samples_per_pixel: int = 1
    compression: int = 1
    tile_width: int = 0
    tile_height: int = 0
    subifd_offsets: list = field(default_factory=list)
    entries: list = field(default_factory=list)

    @property
    def compression_name(self):
        return COMPRESSION_NAMES.get(self.compression, str(self.compression))

    def short_desc(self, label: str = "") -> str:
        tile = f", tiles={self.tile_width}x{self.tile_height}" if self.tile_width else ""
        prefix = f"{label}: " if label else ""
        return (
            f"{prefix}{self.width}x{self.height}, "
            f"{self.bits_per_sample}-bit x{self.samples_per_pixel}ch, "
            f"compression={self.compression_name}{tile}"
        )

## Helper: TIFF / BigTIFF IFD Parser

In [4]:
def read_ifd(url: str, offset: int, byte_order: str, *, bigtiff: bool) -> tuple[IFDSummary, int]:
    """Read a single IFD from a remote TIFF.

    Returns (IFDSummary, next_ifd_offset).  next_ifd_offset is 0 when
    there are no more IFDs in the chain.
    """
    if bigtiff:
        count_size, entry_size, ptr_size, count_fmt, ptr_fmt = 8, 20, 8, "Q", "Q"
    else:
        count_size, entry_size, ptr_size, count_fmt, ptr_fmt = 2, 12, 4, "H", "I"

    count_data = fetch_range(url, offset, offset + count_size - 1)
    num_entries = struct.unpack(f"{byte_order}{count_fmt}", count_data)[0]

    entries_start = offset + count_size
    entries_data = fetch_range(url, entries_start, entries_start + num_entries * entry_size - 1)

    summary = IFDSummary()
    for i in range(num_entries):
        raw = entries_data[i * entry_size:(i + 1) * entry_size]
        if bigtiff:
            tag, dtype, count = struct.unpack(f"{byte_order}HHQ", raw[:12])
            val_bytes = raw[12:20]
        else:
            tag, dtype, count = struct.unpack(f"{byte_order}HHI", raw[:8])
            val_bytes = raw[8:12]

        val = _unpack_ifd_value(byte_order, dtype, val_bytes)
        entry = IFDEntry(tag=tag, dtype=dtype, count=count, value=val)
        summary.entries.append(entry)

        if tag == 256:
            summary.width = val
        elif tag == 257:
            summary.height = val
        elif tag == 258:
            summary.bits_per_sample = val
        elif tag == 259:
            summary.compression = val
        elif tag == 277:
            summary.samples_per_pixel = val
        elif tag == 322:
            summary.tile_width = val
        elif tag == 323:
            summary.tile_height = val
        elif tag == 330:  # SubIFDs
            if bigtiff:
                sub_data = fetch_range(url, val, val + count * 8 - 1)
                summary.subifd_offsets = list(struct.unpack(f"{byte_order}{count}Q", sub_data))
            else:
                sub_data = fetch_range(url, val, val + count * 4 - 1)
                summary.subifd_offsets = list(struct.unpack(f"{byte_order}{count}I", sub_data))

    # Next IFD pointer follows the entries
    next_ptr_offset = entries_start + num_entries * entry_size
    next_data = fetch_range(url, next_ptr_offset, next_ptr_offset + ptr_size - 1)
    next_ifd = struct.unpack(f"{byte_order}{ptr_fmt}", next_data)[0]

    return summary, next_ifd


def fetch_string_tag(url: str, entry: IFDEntry, byte_order: str, *, max_bytes: int = 65536) -> str:
    """Fetch the string value of a TIFF tag (e.g. ImageDescription, Software)."""
    nbytes = min(entry.count, max_bytes)
    data = fetch_range(url, entry.value, entry.value + nbytes - 1)
    return data.decode("utf-8", errors="replace").rstrip("\x00")

## Step 1: Read TIFF Header and First IFD

In [5]:
# Verify the server supports range requests and get file size
head = requests.head(IMAGE_URL, timeout=15)
head.raise_for_status()
file_size = int(head.headers.get("Content-Length", 0))
accepts_ranges = head.headers.get("Accept-Ranges", "none")
print(f"File size: {file_size:,} bytes ({file_size / (1024**3):.2f} GB)")
print(f"Accept-Ranges: {accepts_ranges}")
if accepts_ranges == "none":
    raise RuntimeError("Server does not support range requests — cannot read metadata without downloading the full file.")

# Read the 8-byte TIFF header
header = fetch_range(IMAGE_URL, 0, 15)  # fetch 16 bytes to cover BigTIFF header

byte_order = "<" if header[:2] == b"II" else ">"
magic = struct.unpack(f"{byte_order}H", header[2:4])[0]
bigtiff = magic == 43

if bigtiff:
    offset_size = struct.unpack(f"{byte_order}H", header[4:6])[0]
    first_ifd_offset = struct.unpack(f"{byte_order}Q", header[8:16])[0]
    print(f"Format: BigTIFF ({'little' if byte_order == '<' else 'big'}-endian)")
else:
    first_ifd_offset = struct.unpack(f"{byte_order}I", header[4:8])[0]
    print(f"Format: Classic TIFF ({'little' if byte_order == '<' else 'big'}-endian)")

print(f"First IFD offset: {first_ifd_offset:,}")

# Read first IFD
first_ifd, next_ifd_offset = read_ifd(IMAGE_URL, first_ifd_offset, byte_order, bigtiff=bigtiff)
print(f"\nFirst IFD: {first_ifd.short_desc()}")
print(f"  {len(first_ifd.entries)} tags, {len(first_ifd.subifd_offsets)} sub-IFDs")

# Fetch Software tag if present
for entry in first_ifd.entries:
    if entry.tag == 305:
        software = fetch_string_tag(IMAGE_URL, entry, byte_order)
        print(f"  Software: {software}")

File size: 34,385,907,269 bytes (32.02 GB)
Accept-Ranges: bytes
Format: BigTIFF (big-endian)
First IFD offset: 34,361,978,704

First IFD: 49152x65536, 16-bit x1ch, compression=None, tiles=512x512
  18 tags, 8 sub-IFDs
  Software: OME Bio-Formats 7.1.0


## Step 2: Parse OME-XML

In [6]:
# Find ImageDescription tag (270) — contains the OME-XML
desc_entry = next((e for e in first_ifd.entries if e.tag == 270), None)

if desc_entry is None:
    raise RuntimeError("No ImageDescription tag found — this may not be an OME-TIFF.")

print(f"OME-XML size: {desc_entry.count:,} bytes")

# Fetch the full OME-XML (can be multiple MB for files with many ROIs)
ome_xml = fetch_range(IMAGE_URL, desc_entry.value, desc_entry.value + desc_entry.count - 1)
ome_xml = ome_xml.decode("utf-8", errors="replace").rstrip("\x00")

root = ET.fromstring(ome_xml)
ns = ""
if "}" in root.tag:
    ns = root.tag.split("}")[0] + "}"

# Print top-level OME attributes
print(f"\nOME attributes:")
for k, v in root.attrib.items():
    if "schemaLocation" not in k:
        print(f"  {k.split('}')[-1]}: {v}")

OME-XML size: 23,140,345 bytes

OME attributes:
  Creator: Nanostring GeoMx 3.1.0.222
  UUID: urn:uuid:d03b1055-c4a7-498e-950e-0b10733fa0d4


## Step 3: Image & Pixel Metadata

In [7]:
for img in root.findall(f"{ns}Image"):
    img_id = img.get("ID", "")
    img_name = img.get("Name", "")
    acq_date = ""
    acq_el = img.find(f"{ns}AcquisitionDate")
    if acq_el is not None and acq_el.text:
        acq_date = acq_el.text

    print(f"Image: {img_name} ({img_id})")
    if acq_date:
        print(f"  Acquired: {acq_date}")

    pixels = img.find(f"{ns}Pixels")
    if pixels is None:
        print("  (no Pixels element)")
        continue

    attrs = pixels.attrib
    print(f"  Dimensions: {attrs.get('SizeX')} x {attrs.get('SizeY')} px")
    print(f"  SizeZ={attrs.get('SizeZ', '1')}, SizeC={attrs.get('SizeC', '1')}, SizeT={attrs.get('SizeT', '1')}")
    print(f"  Pixel type: {attrs.get('Type')} ({attrs.get('SignificantBits', '?')}-bit significant)")
    print(f"  Dimension order: {attrs.get('DimensionOrder')}")
    print(f"  Interleaved: {attrs.get('Interleaved', 'false')}")

    phys_x = attrs.get("PhysicalSizeX")
    phys_y = attrs.get("PhysicalSizeY")
    unit_x = attrs.get("PhysicalSizeXUnit", "\u00b5m")
    unit_y = attrs.get("PhysicalSizeYUnit", "\u00b5m")
    if phys_x and phys_y:
        print(f"  Physical pixel size: {phys_x} {unit_x} x {phys_y} {unit_y}")
        total_x = int(attrs["SizeX"]) * float(phys_x)
        total_y = int(attrs["SizeY"]) * float(phys_y)
        print(f"  Total physical size: {total_x:.2f} x {total_y:.2f} {unit_x}")

    # Channels
    channels = pixels.findall(f"{ns}Channel")
    if channels:
        print(f"  Channels ({len(channels)}):")
        for ch in channels:
            ch_name = ch.get("Name", "")
            ch_fluor = ch.get("Fluor", "")
            ch_color = ch.get("Color", "")
            ch_spp = ch.get("SamplesPerPixel", "1")
            print(f"    {ch.get('ID')}: {ch_name}  fluor={ch_fluor}  color={ch_color}  spp={ch_spp}")

    # Planes (exposure times, z/c/t indices)
    planes = pixels.findall(f"{ns}Plane")
    if planes:
        print(f"  Planes ({len(planes)}):")
        for pl in planes:
            exp = pl.get("ExposureTime", "")
            exp_unit = pl.get("ExposureTimeUnit", "")
            print(
                f"    C={pl.get('TheC')} Z={pl.get('TheZ')} T={pl.get('TheT')}"
                + (f"  exposure={exp} {exp_unit}" if exp else "")
            )

Image: 3041_pB_M (Image:0)
  Acquired: 2022-09-16T19:29:35.739
  Dimensions: 49152 x 65536 px
  SizeZ=1, SizeC=4, SizeT=1
  Pixel type: uint16 (16-bit significant)
  Dimension order: XYZCT
  Interleaved: false
  Physical pixel size: 0.400673121 µm x 0.399840057 µm
  Total physical size: 19693.89 x 26203.92 µm
  Channels (4):
    Channel:0:0: FITC/525nm  fluor=SYTO 13  color=65279  spp=1
    Channel:0:1: Cy3/568nm  fluor=Alexa 532  color=16646399  spp=1
    Channel:0:2: Texas Red/615nm  fluor=Alexa 594  color=-16908033  spp=1
    Channel:0:3: Cy5/666nm  fluor=Cy5  color=-33554177  spp=1
  Planes (4):
    C=0 Z=0 T=0  exposure=100.0 µs
    C=1 Z=0 T=0  exposure=300.0 µs
    C=2 Z=0 T=0  exposure=300.0 µs
    C=3 Z=0 T=0  exposure=300.0 µs


## Step 4: Pyramid Levels (Sub-IFDs)

In [None]:
print(f"Full resolution (IFD 0): {first_ifd.short_desc()}")

if first_ifd.subifd_offsets:
    print(f"\nPyramid sub-IFDs ({len(first_ifd.subifd_offsets)} levels):")
    for i, off in enumerate(first_ifd.subifd_offsets):
        sub, _ = read_ifd(IMAGE_URL, off, byte_order, bigtiff=bigtiff)
        print(f"  Level {i + 1}: {sub.short_desc()}")
else:
    print("\nNo sub-IFDs (no pyramid / single resolution).")

# Walk the top-level IFD chain to find additional channels
channel_ifds = []
ifd_offset = next_ifd_offset
while ifd_offset != 0 and len(channel_ifds) < 20:
    ifd_summary, ifd_offset = read_ifd(IMAGE_URL, ifd_offset, byte_order, bigtiff=bigtiff)
    channel_ifds.append(ifd_summary)

if channel_ifds:
    print(f"\nAdditional top-level IFDs ({len(channel_ifds)}):")
    for i, ifd in enumerate(channel_ifds):
        print(f"  IFD {i + 1}: {ifd.short_desc()}")

## Step 5: Structured Annotations

OME-XML can contain vendor-specific metadata (e.g. channel intensity ranges,
biological targets, instrument info) in `<StructuredAnnotations>`.

In [None]:
sa = root.find(f"{ns}StructuredAnnotations")
if sa is None:
    print("No StructuredAnnotations found.")
else:
    annotations = list(sa)
    print(f"{len(annotations)} annotation(s):\n")
    for ann in annotations:
        ann_id = ann.get("ID", "")
        ann_type = ann.tag.replace(ns, "")
        value_el = ann.find(f"{ns}Value")
        if value_el is not None and len(value_el) > 0:
            # XML annotation — print the inner element name and text content
            inner = value_el[0]
            inner_tag = inner.tag.replace(ns, "")
            # Collect leaf text values
            fields = {child.tag.replace(ns, ""): (child.text or "").strip() for child in inner}
            print(f"  {ann_id} ({ann_type}/{inner_tag}): {fields}")
        elif value_el is not None and value_el.text:
            text = value_el.text.strip()
            print(f"  {ann_id} ({ann_type}): {text[:200]}{'...' if len(text) > 200 else ''}")
        else:
            print(f"  {ann_id} ({ann_type}): (empty)")

## Step 6: ROIs

ROIs defined in the OME-XML (e.g. scan regions for spatial transcriptomics instruments).

In [None]:
rois = root.findall(f"{ns}ROI")
roi_refs = root.findall(f".//{ns}ROIRef")

if not rois:
    print("No ROIs defined.")
else:
    print(f"{len(rois)} ROI(s) defined, {len(roi_refs)} ROIRef(s) in images:\n")
    for roi in rois[:25]:  # cap at 25 to avoid excessive output
        roi_id = roi.get("ID", "")
        roi_name = roi.get("Name", "")
        union = roi.find(f"{ns}Union")
        shapes = list(union) if union is not None else []
        shape_types = [s.tag.replace(ns, "") for s in shapes]
        print(f"  {roi_id}: {roi_name or '(unnamed)'}  shapes={shape_types}")
        for shape in shapes:
            shape_type = shape.tag.replace(ns, "")
            relevant_attrs = {k: v for k, v in shape.attrib.items() if k not in ("ID",)}
            if relevant_attrs:
                print(f"    {shape_type}: {relevant_attrs}")
    if len(rois) > 25:
        print(f"  ... and {len(rois) - 25} more ROI(s)")

## Raw OME-XML (first 5000 characters)

For reference / quick inspection of the raw XML.

In [None]:
print(ome_xml[:5000])
if len(ome_xml) > 5000:
    print(f"\n... truncated ({len(ome_xml):,} total characters)")