In [None]:
import pandas as pd
# load /home/eo287/mnt/s3_ccta/summaries/ccta_series_headers_only_E100138698.csv
df = pd.read_csv('/home/eo287/mnt/s3_ccta/summaries/ccta_series_headers_only_E***.csv')
print(df.head())

In [None]:
df.columns.to_list()

In [None]:
#ImageType (0008,0008)
print(df['ImageType'].value_counts())

In [None]:
# show all columsn
pd.set_option('display.max_columns', None)
df

In [None]:
# SliceThickness
print(df['SliceThickness'].value_counts())

In [None]:
# keep if slie thickness is < 1
df = df[df['SliceThickness'] < 1]
# keep if ContrastBolusAgent is not null
df = df[df['ContrastBolusAgent'].notnull()]
# SpacingBetweenSlices > 0
df = df[df['SpacingBetweenSlices'] > 0]

In [None]:
#ContrastBolusAgent
print(df['ContrastBolusAgent'].value_counts())

In [None]:
#XRayTubeCurrent
print(df['XRayTubeCurrent'].value_counts())

In [None]:
#KVP
print(df['KVP'].value_counts())

In [None]:
df["RepresentativeFile"].iloc[0]

In [None]:
df["SeriesInstanceUID"].iloc[0]

In [None]:
import os
import SimpleITK as sitk
import matplotlib.pyplot as plt

# ----------------------------
# CONFIG: point to any one DICOM file from the series
# ----------------------------
example_file = df["RepresentativeFile"].iloc[0]

# ----------------------------
# Helper: read DICOM header (no pixels) with SimpleITK
# ----------------------------
def read_dicom_header_only(dcm_path: str, load_private: bool = True) -> dict:
    rdr = sitk.ImageFileReader()
    rdr.SetFileName(dcm_path)
    # ReadImageInformation() parses header only
    rdr.ReadImageInformation()
    # enable reading of private tags if requested (supported by GDCM backend inside ITK)
    if load_private:
        try:
            rdr.LoadPrivateTagsOn()
        except Exception:
            pass  # older ITK/SimpleITK may already expose private tags
    meta = {}
    for k in rdr.GetMetaDataKeys():
        try:
            meta[k] = rdr.GetMetaData(k)
        except Exception:
            meta[k] = "<unreadable>"
    return meta

# ----------------------------
# Helper: get SeriesInstanceUID of a given file (via header-only read)
# ----------------------------
def get_series_uid(dcm_path: str) -> str:
    meta = read_dicom_header_only(dcm_path, load_private=True)
    # DICOM tag (0020,000E) = Series Instance UID
    # SimpleITK exposes as "0020|000e" (case-insensitive)
    for key in ("0020|000e", "0020|000E"):
        if key in meta:
            return meta[key]
    raise RuntimeError(f"SeriesInstanceUID not found in {dcm_path}")

# ----------------------------
# Given example file, resolve its series directory and UID
# ----------------------------
series_dir = os.path.dirname(example_file)
series_uid = get_series_uid(example_file)
print("SeriesInstanceUID:", series_uid)
print("Series directory :", series_dir)

# ----------------------------
# Find all series in directory; if not found, try parent
# ----------------------------
r = sitk.ImageSeriesReader()
uids_here = r.GetGDCMSeriesIDs(series_dir) or []
if series_uid not in uids_here:
    parent = os.path.dirname(series_dir)
    uids_parent = r.GetGDCMSeriesIDs(parent) or []
    if series_uid in uids_parent:
        series_dir = parent
        print("Target series found in parent dir:", series_dir)
    else:
        raise RuntimeError(f"Target UID not found.\n Here: {uids_here}\n Parent: {uids_parent}")

# ----------------------------
# Get the file list (sorted) for that series and load volume
# ----------------------------
file_list = r.GetGDCMSeriesFileNames(series_dir, series_uid)
print(f"Found {len(file_list)} slices in series.")

r.SetFileNames(file_list)
# Optional: these controls help expose per-file metadata into the reader if needed
r.MetaDataDictionaryArrayUpdateOn()
r.LoadPrivateTagsOn()

img = r.Execute()  # SimpleITK Image: 3D (z,y,x) for a classic CT series, or multi-frame handled internally
arr = sitk.GetArrayFromImage(img)  # numpy array: shape (slices, H, W)

print("Volume shape (z, y, x):", arr.shape)
print("Voxel spacing (x, y, z) [mm]:", img.GetSpacing())
print("Origin (x, y, z):", img.GetOrigin())
print("Direction (3x3, flattened):", img.GetDirection())

# ----------------------------
# Print key series headers from the first slice file (no external deps)
# ----------------------------
rep_meta = read_dicom_header_only(file_list[0], load_private=True)
def get(meta, tag_hex, default="NA"):
    # tag_hex like "0008|103E" (SeriesDescription)
    return meta.get(tag_hex, default)

print("\n--- Series header (selected tags from first file) ---")
print("SeriesDescription (0008,103E):", get(rep_meta, "0008|103e"))
print("ProtocolName    (0018,1030):", get(rep_meta, "0018|1030"))
print("ImageType       (0008,0008):", get(rep_meta, "0008|0008"))
print("SliceThickness  (0018,0050):", get(rep_meta, "0018|0050"))
print("PixelSpacing    (0028,0030):", get(rep_meta, "0028|0030"))
print("ConvolutionKernel (0018,1210):", get(rep_meta, "0018|1210"))
print("ContrastBolusAgent (0018,0010):", get(rep_meta, "0018|0010"))
print("KVP            (0018,0060):", get(rep_meta, "0018|0060"))
print("StudyDate      (0008,0020):", get(rep_meta, "0008|0020"))
print("SeriesDate     (0008,0021):", get(rep_meta, "0008|0021"))
print("AcqTime        (0008,0032):", get(rep_meta, "0008|0032"))

# If you want to inspect *all* tags for the representative file:
# for k in sorted(rep_meta.keys()):
#     print(k, ":", rep_meta[k])

# ----------------------------
# Visualize a few slices
# ----------------------------
import math
n = arr.shape[0]
for idx in [0, n//3, n//2, (2*n)//3, n-1]:
    plt.figure(figsize=(6,6))
    plt.imshow(arr[idx], cmap="gray")
    plt.title(f"Slice {idx+1}/{n}")
    plt.axis("off")
    plt.show()


In [None]:
import SimpleITK as sitk
import matplotlib.pyplot as plt
from tqdm import tqdm

# You can limit to first N for sanity check, e.g. df.head(10)
files = df["RepresentativeFile"].tolist()

print(f"Rendering {len(files)} representative DICOMs...")

for path in tqdm(files):
    try:
        img = sitk.ReadImage(path)
        arr = sitk.GetArrayFromImage(img)  # shape: (slices, height, width)
        arr2d = arr[0] if arr.ndim == 3 else arr  # take first slice if 3D
        plt.figure(figsize=(5, 5))
        plt.imshow(arr2d, cmap="gray")
        plt.title(path.split('/')[-3])  # e.g. E100**** folder
        plt.axis("off")
        plt.show()
    except Exception as e:
        print(f"[WARN] Could not read {path}: {e}")
