In [1]:
!pip install pydicom SimpleITK

Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Collecting SimpleITK
  Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.2 kB)
Downloading pydicom-3.0.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (52.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK, pydicom
Successfully installed SimpleITK-2.5.2 pydicom-3.0.1


In [4]:
import zipfile
import shutil
from pathlib import Path
from collections import defaultdict
import pydicom
from datetime import datetime
from pydicom.uid import generate_uid
import logging
#!/usr/bin/env python3

import argparse
import os
import shutil
from pathlib import Path

def extract_zip(zip_filepath, extract_dir):
    try:
        with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"✅ Successfully extracted {zip_filepath} to {extract_dir}")
    except FileNotFoundError:
        print(f"❌ Zip file not found at {zip_filepath}")
    except zipfile.BadZipFile:
        print(f"❌ Invalid zip file at {zip_filepath}")
    except Exception as e:
        print(f"❌ Unexpected error: {e}")

def organize_plans(input_dir, output_dir):
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    studies = defaultdict(list)
    for file_path in input_dir.rglob("*.dcm"):
        try:
            ds = pydicom.dcmread(file_path, stop_before_pixels=True)
            study_uid = ds.StudyInstanceUID
            studies[study_uid].append((file_path, ds))
        except Exception as e:
            print(f"❌ Skipping file {file_path.name}: {e}")

    for i, (study_uid, files) in enumerate(studies.items(), 1):
        plan_dir = output_dir / f"Plan_{i}"
        ct_dir = plan_dir / "CT"
        ri_dir = plan_dir / "RI"
        ct_dir.mkdir(parents=True, exist_ok=True)
        ri_dir.mkdir(parents=True, exist_ok=True)

        rs_count, rp_count, rd_count, ri_count = 0, 0, 0, 0

        for file_path, ds in files:
            modality = ds.Modality
            if modality == "CT":
                shutil.copy(file_path, ct_dir / file_path.name)
            elif modality == "RTSTRUCT":
                shutil.copy(file_path, plan_dir / "RS.dcm")
                rs_count += 1
            elif modality == "RTPLAN":
                shutil.copy(file_path, plan_dir / "RP.dcm")
                rp_count += 1
            elif modality == "RTDOSE":
                rd_count += 1
                shutil.copy(file_path, plan_dir / f"RD_{rd_count:02d}.dcm")
            elif modality == "RTIMAGE":
                ri_count += 1
                shutil.copy(file_path, ri_dir / f"RI_{ri_count:03d}.dcm")
            else:
                print(f"ℹ️ Skipping unknown modality {modality}: {file_path.name}")

        print(f"✅ Plan_{i} — CT: {len(list(ct_dir.glob('*.dcm')))}, RS: {rs_count}, RP: {rp_count}, RD: {rd_count}, RI: {ri_count}")

def relink_and_save_plan(plan_dir, output_root):
    plan_dir = Path(plan_dir)
    output_root = Path(output_root)
    output_dir = output_root / plan_dir.name
    output_dir.mkdir(parents=True, exist_ok=True)

    ct_files = sorted(
        [pydicom.dcmread(f) for f in (plan_dir / "CT").glob("*.dcm")],
        key=lambda x: float(x.ImagePositionPatient[2])
    )
    rs = pydicom.dcmread(plan_dir / "RS.dcm")
    rp = pydicom.dcmread(plan_dir / "RP.dcm")
    rd_list = [pydicom.dcmread(p) for p in plan_dir.glob("RD*.dcm")]
    ri_list = [pydicom.dcmread(p) for p in (plan_dir / "RI").glob("*.dcm")] if (plan_dir / "RI").exists() else []

    patient_id = f"PAT_{datetime.now().strftime('%Y%m%d%H%M%S')}"
    patient_name = "DICOM_RELINKED"
    study_uid = generate_uid()
    frame_uid = generate_uid()
    ct_series_uid = generate_uid()
    rs_series_uid = generate_uid()
    rp_series_uid = generate_uid()
    study_description = "Reconstructed RT Study"

    ct_sop_uids = []
    for i, ds in enumerate(ct_files):
        ds = ds.copy()
        ds.PatientID = patient_id
        ds.PatientName = patient_name
        ds.StudyInstanceUID = study_uid
        ds.FrameOfReferenceUID = frame_uid
        ds.SeriesInstanceUID = ct_series_uid
        ds.SeriesDescription = "CT Series"
        ds.SeriesNumber = 1
        ds.Modality = "CT"
        ds.SOPInstanceUID = generate_uid()
        ct_sop_uids.append(ds.SOPInstanceUID)
        ds.StudyDescription = study_description
        ds.save_as(output_dir / f"CT_{i:03d}.dcm")

    rs = rs.copy()
    rs.PatientID = patient_id
    rs.PatientName = patient_name
    rs.StudyInstanceUID = study_uid
    rs.FrameOfReferenceUID = frame_uid
    rs.SeriesInstanceUID = rs_series_uid
    rs.SeriesDescription = "Structure Set"
    rs.SeriesNumber = 2
    rs.Modality = "RTSTRUCT"
    rs.SOPInstanceUID = generate_uid()
    rs.StudyDescription = study_description

    for ref in rs.ReferencedFrameOfReferenceSequence:
        ref.FrameOfReferenceUID = frame_uid
        for study in ref.RTReferencedStudySequence:
            study.ReferencedSOPInstanceUID = study_uid
            for series in study.RTReferencedSeriesSequence:
                series.SeriesInstanceUID = ct_series_uid
                for idx, img in enumerate(series.ContourImageSequence):
                    if len(ct_sop_uids) > idx:
                        img.ReferencedSOPInstanceUID = ct_sop_uids[idx]

    rs.save_as(output_dir / "RS.dcm")

    rp = rp.copy()
    rp.PatientID = patient_id
    rp.PatientName = patient_name
    rp.StudyInstanceUID = study_uid
    rp.FrameOfReferenceUID = frame_uid
    rp.SeriesInstanceUID = rp_series_uid
    rp.SeriesDescription = "Radiotherapy Plan"
    rp.SeriesNumber = 3
    rp.Modality = "RTPLAN"
    rp.SOPInstanceUID = generate_uid()
    rp.StudyDescription = study_description

    if hasattr(rp, "ReferencedStructureSetSequence"):
        rp.ReferencedStructureSetSequence[0].ReferencedSOPInstanceUID = rs.SOPInstanceUID
        rp.ReferencedStructureSetSequence[0].ReferencedSOPClassUID = rs.SOPClassUID

    rp.save_as(output_dir / "RP.dcm")

    for i, rd in enumerate(rd_list):
        rd = rd.copy()
        rd.PatientID = patient_id
        rd.PatientName = patient_name
        rd.StudyInstanceUID = study_uid
        rd.FrameOfReferenceUID = frame_uid
        rd.SeriesInstanceUID = generate_uid()
        rd.SeriesDescription = f"RT Dose {i+1}"
        rd.SeriesNumber = 4 + i
        rd.Modality = "RTDOSE"
        rd.SOPInstanceUID = generate_uid()
        rd.StudyDescription = study_description

        if hasattr(rd, "ReferencedStructureSetSequence"):
            rd.ReferencedStructureSetSequence[0].ReferencedSOPInstanceUID = rs.SOPInstanceUID
            rd.ReferencedStructureSetSequence[0].ReferencedSOPClassUID = rs.SOPClassUID
        if hasattr(rd, "ReferencedRTPlanSequence"):
            rd.ReferencedRTPlanSequence[0].ReferencedSOPInstanceUID = rp.SOPInstanceUID
            rd.ReferencedRTPlanSequence[0].ReferencedSOPClassUID = rp.SOPClassUID

        rd.save_as(output_dir / f"RD_{i+1}.dcm")

    for i, ri in enumerate(ri_list):
        ri = ri.copy()
        ri.PatientID = patient_id
        ri.PatientName = patient_name
        ri.StudyInstanceUID = study_uid
        ri.FrameOfReferenceUID = frame_uid
        ri.SeriesInstanceUID = generate_uid()
        ri.SeriesDescription = f"RI Image {i+1}"
        ri.SeriesNumber = 100 + i
        ri.Modality = "RTIMAGE"
        ri.SOPInstanceUID = generate_uid()
        ri.StudyDescription = study_description

        if hasattr(ri, "ReferencedRTPlanSequence"):
            ri.ReferencedRTPlanSequence[0].ReferencedSOPInstanceUID = rp.SOPInstanceUID
            ri.ReferencedRTPlanSequence[0].ReferencedSOPClassUID = rp.SOPClassUID

        ri.save_as(output_dir / f"RI_{i+1:03d}.dcm")

    print(f"✅ Relinked and saved to: {output_dir}")

def relink_ris_to_plan(plan_dir):
    plan_dir = Path(plan_dir)
    rp_path = plan_dir / "RP.dcm"
    ri_files = list(plan_dir.glob("RI_*.dcm"))

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger("RI-Relinker")

    if not rp_path.exists():
        print(f"❌ RP.dcm not found in {plan_dir}")
        return

    try:
        rp = pydicom.dcmread(rp_path, stop_before_pixels=True)
        plan_uid = rp.SOPInstanceUID
        patient_id = rp.PatientID
        plan_id = rp.RTPlanLabel
        sop_class_uid = rp.SOPClassUID

        machine_by_beam = {
            beam.BeamNumber: getattr(beam, "TreatmentMachineName", None)
            for beam in rp.BeamSequence
        }
    except Exception as e:
        print(f"❌ Could not read RP.dcm: {e}")
        return

    dest_dir = plan_dir / patient_id / plan_id
    dest_dir.mkdir(parents=True, exist_ok=True)

    for ri_path in ri_files:
        try:
            ri = pydicom.dcmread(ri_path)
            beam_num = getattr(ri, "ReferencedBeamNumber", None)
            correct_machine = machine_by_beam.get(beam_num)
            needs_fix = False

            if correct_machine and getattr(ri, "RadiationMachineName", None) != correct_machine:
                ri.RadiationMachineName = correct_machine
                needs_fix = True
                print(f"🔧 Fixed machine in {ri_path.name}")

            ref_uid = None
            if hasattr(ri, "ReferencedSOPInstanceUID"):
                ref_uid = ri.ReferencedSOPInstanceUID
            elif hasattr(ri, "ReferencedImageSequence") and ri.ReferencedImageSequence:
                ref_uid = getattr(ri.ReferencedImageSequence[0], "ReferencedSOPInstanceUID", None)

            if ref_uid != plan_uid:
                if not hasattr(ri, "ReferencedImageSequence") or not ri.ReferencedImageSequence:
                    ri.ReferencedImageSequence = [pydicom.Dataset()]
                ri.ReferencedImageSequence[0].ReferencedSOPInstanceUID = plan_uid
                ri.ReferencedImageSequence[0].ReferencedSOPClassUID = sop_class_uid
                needs_fix = True
                print(f"🔧 Fixed UID in {ri_path.name}")

            out_path = dest_dir / ri_path.name
            if needs_fix:
                ri.PatientID = patient_id
                ri.save_as(out_path)
            else:
                shutil.copy(ri_path, out_path)
        except Exception as e:
            print(f"❌ Error processing {ri_path.name}: {e}")

    logger.info("✅ Relinking RI complete.")

def zip_plan_folder(plan_dir, zip_path):
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in Path(plan_dir).rglob('*'):
            zipf.write(file, file.relative_to(plan_dir))
    print(f"🗜️  Zipped to: {zip_path}")



In [5]:
zip_path = Path("/content/PLAN QA - 6FFF.zip")
workdir = Path("/content/work")
outputdir = Path("/content/output")
extract_dir = workdir / "dicomfiles"
org_dir = workdir / "organized_plans"
relinked_dir = workdir / "relinked_plans"

print(f"📦 Extracting {zip_path}...")
extract_zip(zip_path, extract_dir)

print("📂 Organizing DICOM files...")
organize_plans(extract_dir, org_dir)

print("🔁 Relinking first plan...")
relink_and_save_plan(org_dir / "Plan_1", relinked_dir)

print("🔗 Relinking RI files...")
relink_ris_to_plan(relinked_dir / "Plan_1")

print("🗜️ Zipping output...")
output_zip = outputdir / "Plan_1.zip"
outputdir.mkdir(parents=True, exist_ok=True)
zip_plan_folder(relinked_dir / "Plan_1", output_zip)

print(f"✅ Done! Output saved to {output_zip}")

📦 Extracting /content/PLAN QA - 6FFF.zip...
✅ Successfully extracted /content/PLAN QA - 6FFF.zip to /content/work/dicomfiles
📂 Organizing DICOM files...
✅ Plan_1 — CT: 101, RS: 1, RP: 1, RD: 2, RI: 0
✅ Plan_2 — CT: 0, RS: 0, RP: 0, RD: 0, RI: 1
🔁 Relinking first plan...
✅ Relinked and saved to: /content/work/relinked_plans/Plan_1
🔗 Relinking RI files...
🗜️ Zipping output...
🗜️  Zipped to: /content/output/Plan_1.zip
✅ Done! Output saved to /content/output/Plan_1.zip
