In [4]:
# /tools/mat_inspect_dir.py
"""
Scan a directory for .mat files (v7.3 and older) and list their contents:
- For v7.3 (HDF5): traverse groups/datasets and print path, shape, dtype, MATLAB_class
- For <= v7.2: list top-level variables with shape/dtype
Usage:
    python mat_inspect_dir.py
Or import and call:
    summarize_mat_dir("/home/work/OCT_DL/CDAC_OCT/CDAC_PYTHON/2_3Dregistration")
"""

import os
import sys
import glob
from typing import List, Dict, Any

def _is_mat_v73(path: str) -> bool:
    # v7.3는 HDF5 포맷이라 h5py로 열 수 있음
    try:
        import h5py  # lazy import
        with h5py.File(path, "r"):
            return True
    except Exception:
        return False

def _summarize_v73(path: str) -> List[Dict[str, Any]]:
    import h5py
    rows = []
    def visit(name, obj):
        entry = {"file": os.path.basename(path), "version": "v7.3", "path": f"/{name}"}
        if isinstance(obj, h5py.Dataset):
            entry.update({
                "type": "dataset",
                "shape": tuple(obj.shape),
                "dtype": str(obj.dtype),
                "matlab_class": obj.attrs.get("MATLAB_class", b"").decode("utf-8", errors="ignore")
                                if isinstance(obj.attrs.get("MATLAB_class", None), (bytes, bytearray)) 
                                else obj.attrs.get("MATLAB_class", ""),
                "note": "",
            })
        else:
            # Group
            entry.update({"type": "group", "shape": "", "dtype": "", "matlab_class": "", "note": ""})
        rows.append(entry)
    with h5py.File(path, "r") as f:
        f.visititems(visit)
    return rows

def _summarize_legacy(path: str) -> List[Dict[str, Any]]:
    # v7.2 이하는 scipy로 로드
    import scipy.io as sio
    rows = []
    try:
        data = sio.loadmat(path, struct_as_record=False, squeeze_me=True)
    except Exception as e:
        rows.append({
            "file": os.path.basename(path), "version": "v7.0-7.2?",
            "path": "", "type": "error", "shape": "", "dtype": "", "matlab_class": "", "note": str(e)
        })
        return rows

    for k, v in data.items():
        if k.startswith("__"):
            continue  # __header__, __version__, __globals__ 제외
        shape = ""
        dtype = type(v).__name__
        note = ""

        try:
            import numpy as np
            if isinstance(v, np.ndarray):
                shape = tuple(v.shape)
                dtype = f"ndarray({v.dtype})"
                # MATLAB struct/cell 등은 object dtype로 올 수 있음
                if v.dtype == object:
                    note = "object array (struct/cell?)"
            else:
                # 스칼라/리스트/기타
                shape = ""
        except Exception as e:
            note = f"introspect error: {e}"

        rows.append({
            "file": os.path.basename(path),
            "version": "v7.0-7.2?",
            "path": k,           # legacy는 top-level 변수명
            "type": "variable",
            "shape": shape,
            "dtype": dtype,
            "matlab_class": "",
            "note": note,
        })
    return rows

def summarize_mat_dir(root: str, save_csv: bool = False, csv_path: str = "mat_summary.csv") -> List[Dict[str, Any]]:
    mat_files = sorted(glob.glob(os.path.join(root, "*.mat")))
    if not mat_files:
        print(f"[INFO] No .mat files found under: {root}")
        return []

    all_rows: List[Dict[str, Any]] = []
    for fp in mat_files:
        try:
            if _is_mat_v73(fp):
                rows = _summarize_v73(fp)
            else:
                rows = _summarize_legacy(fp)
            all_rows.extend(rows)
            print(f"[OK] Scanned: {os.path.basename(fp)}  ({len(rows)} items)")
        except Exception as e:
            all_rows.append({
                "file": os.path.basename(fp), "version": "unknown",
                "path": "", "type": "error", "shape": "", "dtype": "", "matlab_class": "", "note": str(e)
            })
            print(f"[ERR] {os.path.basename(fp)}: {e}")

    # Pretty print summary per file
    from collections import defaultdict
    per_file = defaultdict(list)
    for r in all_rows:
        per_file[r["file"]].append(r)

    for fname, rows in per_file.items():
        print("\n" + "="*80)
        print(f"FILE: {fname}  (items: {len(rows)})")
        print("- path | type | shape | dtype | matlab_class | note")
        for r in rows:
            print(f"{r['path'] or '/'} | {r['type']} | {r['shape']} | {r['dtype']} | {r['matlab_class']} | {r['note']}")

    if save_csv:
        try:
            import pandas as pd
            df = pd.DataFrame(all_rows)
            df.to_csv(csv_path, index=False)
            print(f"\n[SAVED] CSV summary -> {csv_path}")
        except Exception as e:
            print(f"[WARN] Failed to save CSV: {e}")

    return all_rows

if __name__ == "__main__":
    import argparse, os
    default_dir = "/home/work/OCT_DL/CDAC_OCT/CDAC_PYTHON/2_3Dregistration"

    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument("root", nargs="?", default=default_dir,
                        help="Directory to scan for .mat files")
    parser.add_argument("--recursive", action="store_true",
                        help="Scan subdirectories recursively")
    parser.add_argument("--csv", action="store_true",
                        help="Save CSV summary")
    parser.add_argument("--csv-path", default=os.path.join(default_dir, "mat_summary.csv"))
    # parse_known_args 로 ipykernel이 주는 --f=... 등은 무시
    args, _unknown = parser.parse_known_args()

    # summarize_mat_dir 안에서 glob 을 재귀 옵션 반영하도록 수정했다면 사용:
    summarize_mat_dir(args.root, save_csv=args.csv, csv_path=args.csv_path)

[OK] Scanned: d5_Int_04_CAO.mat  (1 items)
[OK] Scanned: d5_Int_05_CAO.mat  (1 items)
[OK] Scanned: d5_Int_05_subLayers_0704.mat  (4 items)
[OK] Scanned: d5_Int_07_Layers_0704.mat  (4 items)
[OK] Scanned: d5_Int_08_Layers_0704.mat  (4 items)
[OK] Scanned: d5_Int_08_subLayers_0704.mat  (4 items)
[OK] Scanned: d5_subvolume_aligned_0704_01.mat  (1 items)
[OK] Scanned: d5_subvolume_aligned_0704_03.mat  (1 items)
[OK] Scanned: d5_subvolume_aligned_0704_04.mat  (1 items)
[OK] Scanned: d5_subvolume_aligned_0704_05.mat  (1 items)
[OK] Scanned: d5_subvolume_aligned_0704_07.mat  (1 items)
[OK] Scanned: d5_subvolume_aligned_0704_10.mat  (1 items)
[OK] Scanned: d6_Info.mat  (1 items)
[OK] Scanned: d6_Int_01_subLayers_0704.mat  (4 items)
[OK] Scanned: d6_Int_07_CAO.mat  (1 items)
[OK] Scanned: d6_Int_07_Layers_0704.mat  (4 items)
[OK] Scanned: d6_Int_08_Layers_0704.mat  (4 items)
[OK] Scanned: d6_subvolume_aligned_0704_08.mat  (1 items)
[OK] Scanned: d6_subvolume_aligned_0704_09.mat  (1 items)

FIL

In [1]:
from scipy.io import loadmat
import pprint

info = loadmat("/home/work/OCT_DL/CDAC_OCT/SH/cal/info.mat",
               squeeze_me=True, struct_as_record=False)["Info"]
pprint.pprint(info._fieldnames)   # MATLAB struct 의 필드 목록 출력

['wllow',
 'wlhigh',
 'kcenter',
 'kbandwidth',
 'FDFlip',
 'n',
 'numSamples',
 'usedSamples',
 'numUsedSamples',
 'numFTSamples',
 'bgLineShift',
 'bgLines',
 'adaptiveBG',
 'adaptiveBGOsc',
 'bgRef',
 'noiseFloor',
 'bgBW',
 'depthPerPixel',
 'radPerPixel',
 'trigDelay',
 'spectralWindow',
 'resampTraceA',
 'dispComp',
 'bgMean',
 'bgOsc',
 'numRawLines',
 'numResampLines',
 'resampTraceB']
