# Extract REG transform matrices (interactive notebook)

This notebook scans a DICOM REG file or a folder of DICOM files and prints any transform or matrix-like numeric elements it finds (4x4, 3x4, 3x3).
You can optionally convert from DICOM/LPS to RAS convention for pasting into 3D Slicer.

How to use:
1. Edit the variables in the `Input` cell (file or directory path, and whether to convert to RAS).
2. Run the `Scan` cell. Matrices found will be printed as 4x4 blocks you can copy into Slicer.

In [9]:
# Helpers: parse numeric lists and format matrices
import re
import os
import pydicom
import numpy as np

def try_parse_numeric_list(val):
    """Try to extract floats from a DICOM element value. Returns list of floats or None."""
    try:
markdown
markdown
# Extract registration transforms (PET -> Planning CT)

This notebook scans a folder of DICOM files (default: `WholePelvis` on your Desktop), bins series (PET/CT/MR/REG),
and searches registration objects for numeric transformation matrices. It prints candidate matrices and attempts to
identify PET -> Planning CT mappings using heuristics similar to `Binning.ipynb`.

Run the cells in order. If you want me to run this notebook and capture output, tell me and I will execute it.
code
python
# Install runtime dependencies if missing (safe to re-run)
import sys
import importlib
reqs = ('pydicom','numpy','pandas')
for r in reqs:
    if importlib.util.find_spec(r) is None:
        print(f'Installing {r}...')
        import subprocess
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', r])
print('Dependencies ready')
code
python
import os
import re
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
import pydicom
from pydicom.dataset import Dataset

# Known registration SOP Class UIDs (Spatial / Deformable)
REGISTRATION_SOP_CLASS_UIDS = {
    '1.2.840.10008.5.1.4.1.1.66.1',  # Spatial Registration Storage
    '1.2.840.10008.5.1.4.1.1.66.5',  # Deformable Registration Storage
}

def find_dicom_files(root: str) -> List[str]:
    files = []
    for dp, _, fns in os.walk(root):
        for fn in fns:
            if fn.lower().endswith('.dcm') or '.dcm' in fn.lower():
                files.append(os.path.join(dp, fn))
    return files

def try_read(path: str) -> Optional[Dataset]:
    try:
        return pydicom.dcmread(path, stop_before_pixels=True, force=True)
    except Exception:
        return None

def is_registration(ds: Dataset) -> bool:
    if ds is None: return False
    mod = getattr(ds, 'Modality', '') or ''
    sop = getattr(ds, 'SOPClassUID', '') or ''
    if str(mod).upper() in ('REG','RE'):
        return True
    if str(sop) in REGISTRATION_SOP_CLASS_UIDS:
        return True
    # Heuristic: names of data elements
    for k in ds.dir():
        lk = k.lower()
        if 'registration' in lk or 'transform' in lk or 'matrix' in lk:
            return True
    return False

def find_numeric_in_string(s: str) -> List[float]:
    nums = re.findall(r

-9

-9
code
python
# Configure folder to scan (change if your WholePelvis folder is elsewhere)
FOLDER = r'C:\Users\zhaoanr\Desktop\WholePelvis'
print('Scanning folder:', FOLDER)

# Gather DICOM files
all_files = find_dicom_files(FOLDER)
print(f'Found {len(all_files)} candidate files')

# Build a simple series map (similar to Binning.ipynb)
series_map = defaultdict(lambda: {'header': None, 'modality': 'UNKNOWN', 'files': []})
file_count = 0
skipped = 0
for p in all_files:
    ds = try_read(p)
    if ds is None:
        skipped += 1
        continue
    file_count += 1
    if not hasattr(ds, 'SeriesInstanceUID') or not hasattr(ds, 'Modality'):
        skipped += 1
        continue
    sid = ds.SeriesInstanceUID
    mod = str(ds.Modality).upper() if hasattr(ds, 'Modality') else 'UNKNOWN'
    if series_map[sid]['header'] is None:
        series_map[sid]['header'] = ds
        series_map[sid]['modality'] = mod
    series_map[sid]['files'].append(p)

print(f'Scanned {file_count} DICOM headers, skipped {skipped} files; found {len(series_map)} series')

# Bin series into PET / CT_PLANNING_SIM / CT_ORIGINAL / REG etc.
bins = {'PET':{}, 'CT_ORIGINAL':{}, 'CT_PLANNING_SIM':{}, 'CT_OTHER':{}, 'MRI':{}, 'REG':{}, 'RTSTRUCT':{}, 'RTDOSE':{}, 'OTHER':{}}
# Identify a PET study UID to help classify planning vs original CT (heuristic)
pet_series = [(sid,info) for sid,info in series_map.items() if info['modality'] in ('PT','PET')]
orig_study_uid = None
if pet_series:
    orig_study_uid = pet_series[0][1]['header'].StudyInstanceUID

for sid, info in series_map.items():
    mod = info['modality']
    hdr = info['header']
    study_uid = getattr(hdr, 'StudyInstanceUID', None)
    desc = getattr(hdr, 'SeriesDescription', '') or ''
    if mod in ('PT','PET'):
        bins['PET'][sid] = info
    elif mod == 'CT':
        if orig_study_uid and study_uid == orig_study_uid:
            bins['CT_ORIGINAL'][sid] = info
        elif any(k in desc.upper() for k in ('SIM','PLAN','PLANNING','TREATMENT')):
            bins['CT_PLANNING_SIM'][sid] = info
        else:
            bins['CT_OTHER'][sid] = info
    elif mod in ('MR','MRI'):
        bins['MRI'][sid] = info
    elif mod in ('REG','RE'):
        bins['REG'][sid] = info
    elif mod.startswith('RT'):
        if mod == 'RTSTRUCT': bins['RTSTRUCT'][sid] = info
        elif mod == 'RTDOSE': bins['RTDOSE'][sid] = info
        else: bins['OTHER'][sid] = info
    else:
        bins['OTHER'][sid] = info

print('Binning summary:')
for k in bins:
    print(f'  {k}: {len(bins[k])} series')
code
python
# Search REG objects and extract numeric matrices where possible
results = []  # list of dicts: {file, series_uid, sopclass, modality, refs, matrix}

def dataset_numeric_candidates(ds: Dataset) -> List[Tuple[str, np.ndarray]]:
    out = []
    # Search by element name first
    for name in ds.dir():
        lname = name.lower()
        if any(x in lname for x in ('matrix','transform','registration','parameters')):
            try:
                val = getattr(ds, name)
            except Exception:
                continue
            arr = parse_possible_matrix(val)
            if arr is not None:
                out.append((name, arr))
    # Fallback: scan all elements for long numeric strings or DS/IS lists
    for elem in ds.iterall():
        try:
            if elem.VR in ('DS','FD','IS'):
                arr = parse_possible_matrix(elem.value)
                if arr is not None and arr.size >= 6:
                    out.append((getattr(elem, 'keyword', str(elem.tag)), arr))
            elif elem.VR in ('LO','LT','UT','ST'):
                s = str(elem.value)
                nums = find_numeric_in_string(s)
                if len(nums) >= 6:
                    out.append((getattr(elem, 'keyword', str(elem.tag)), np.array(nums, dtype=float)))
        except Exception:
            continue
    return out

# Iterate REG series (if any) and also scan all files for registration SOP class UID
candidates = []
# prefer explicit REG modality series first
reg_series = list(bins['REG'].items())
for sid, info in reg_series:
    for fp in info['files']:
        ds = try_read(fp)
        if ds is None: continue
        mats = dataset_numeric_candidates(ds)
        refs = collect_references(ds)
        candidates.append({'file':fp, 'series':sid, 'sop':getattr(ds,'SOPClassUID',''), 'modality':getattr(ds,'Modality',''), 'refs':refs, 'mats':mats})

# Also scan all files for registration SOPClassUID as a fallback
for fp in all_files:
    ds = try_read(fp)
    if ds is None: continue
    if getattr(ds,'SOPClassUID','') in REGISTRATION_SOP_CLASS_UIDS or is_registration(ds):
        # avoid duplicates if already added
        if any(c['file']==fp for c in candidates):
            continue
        mats = dataset_numeric_candidates(ds)
        refs = collect_references(ds)
        candidates.append({'file':fp, 'series':getattr(ds,'SeriesInstanceUID',''), 'sop':getattr(ds,'SOPClassUID',''), 'modality':getattr(ds,'Modality',''), 'refs':refs, 'mats':mats})

print(f'Found {len(candidates)} registration-like objects to inspect')

# Heuristic: try to map refs to PET and Planning CT series UIDs/names
def refs_point_to_pet_ct(refs: Dict[str,List[str]]) -> Tuple[bool,bool,List[str]]:
    pet = False; ct = False; extras = []
    for k, vs in refs.items():
        for v in vs:
            lv = v.lower()
            if 'pet' in lv or 'pt' in lv: pet = True
            if 'ct' in lv or 'planning' in lv or 'sim' in lv: ct = True
            extras.append(v)
    return pet, ct, extras

# Print readable report and collect flattened results for saving
rows = []
for c in candidates:
    file = c['file']
    sop = c.get('sop','')
    mod = c.get('modality','')
    refs = c.get('refs',{})
    mats = c.get('mats',[])
    pet_flag, ct_flag, extras = refs_point_to_pet_ct(refs)
    print('







































SyntaxError: unterminated string literal (detected at line 231) (2449640880.py, line 231)