In [None]:
import os
import shutil
from pathlib import Path
from datetime import datetime
from queue import Queue
from pynetdicom import debug_logger
from pydicom.dataset import Dataset
from pydicom.sequence import Sequence
from rosamllib.networking import QueryRetrieveSCU, StoreSCP
from rosamllib.utils import query_df, get_referenced_sop_instance_uids
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings(
    "ignore",
    message=r"Invalid value for VR",
)

In [None]:
MODALITY_BY_CLASS_UID = {
    "1.2.840.10008.5.1.4.1.1.2": "CT",
    "1.2.840.10008.5.1.4.1.1.4": "MR",
    "1.2.840.10008.5.1.4.1.1.128": "PT",
    "1.2.840.10008.5.1.4.1.1.481.1": "RTIMAGE",
    "1.2.840.10008.5.1.4.1.1.1": "CR",
    "1.2.840.10008.5.1.4.1.1.481.4": "RTRECORD",
    "1.2.840.10008.5.1.4.1.1.481.5": "RTPLAN",
    "1.2.840.10008.5.1.4.1.1.481.2": "RTDOSE",
    "1.2.840.10008.5.1.4.1.1.481.3": "RTSTRUCT",
    "1.2.840.10008.5.1.4.1.1.66.1": "REG",
}

CLASS_UID_BY_MODALITY = {
    "RTRECORD": "1.2.840.10008.5.1.4.1.1.481.4",
    "RTPLAN": "1.2.840.10008.5.1.4.1.1.481.5",
    "RTDOSE": "1.2.840.10008.5.1.4.1.1.481.2",
    "RTSTRUCT": "1.2.840.10008.5.1.4.1.1.481.3",
    "REG": "1.2.840.10008.5.1.4.1.1.66.1",
    "CT": "1.2.840.10008.5.1.4.1.1.2",
    "MR": "1.2.840.10008.5.1.4.1.1.4",
    "PT": "1.2.840.10008.5.1.4.1.1.128",
    "RTIMAGE": "1.2.840.10008.5.1.4.1.1.481.1",
    "CR": "1.2.840.10008.5.1.4.1.1.1",
}

In [None]:
scu = QueryRetrieveSCU("YA_DOSE")
scu.configure_logging(log_to_file=True, log_to_console=False, log_file_path=f"qr_scu_{datetime.today().strftime('%Y-%m-%d')}.log")
scu.add_remote_ae("ARIA", "ARIA_AE_TITLE", "ARIA_IP", "ARIA_PORT") # Replace placeholders
scu.add_remote_ae("YA_DOSE", "YA_DOSE", "YA_DOSE_IP", "YA_DOSE_PORT") # Replace placeholders

In [None]:
# Start the scp
scp = StoreSCP("YA_DOSE", "YA_DOSE_IP", "YA_DOSE_PORT") # Replace placeholders
scp.configure_logging(log_to_file=True, log_to_console=False, log_file_path=f"store_scp_{datetime.today().strftime('%Y-%m-%d')}.log")
scp.start(block=False)

In [None]:
scu.c_echo("YA_DOSE")

In [None]:
scu.c_echo("ARIA")

In [None]:
received_dcm = []
def stage_received_dcms(event):
    ds = event.dataset
    received_dcm.append(ds)

In [None]:
def save_received_dcms(event):
    ds = event.dataset
    ds.file_meta = event.file_meta
    data_path = Path(r"D:\DosePredictionData\FROM_ARIA_TEST")
    save_path = data_path/f"{ds.PatientID}"/f"{ds.Modality}-{ds.SeriesInstanceUID}"/f"{ds.Modality}.{ds.SOPInstanceUID}.dcm"
    save_path.parent.mkdir(parents=True, exist_ok=True)
    ds.save_as(save_path, write_like_original=False)

In [None]:
scp.add_custom_function_store(stage_received_dcms)

In [None]:
# Query for all RTPlans from ARIA
plan_ds = Dataset()
plan_ds.PatientID = ""
plan_ds.StudyInstanceUID = ""
plan_ds.SeriesInstanceUID = ""
plan_ds.SOPInstanceUID = ""
plan_ds.SOPClassUID = CLASS_UID_BY_MODALITY["RTPLAN"]
plan_ds.Modality = "RTPLAN"
plan_ds.StudyDate = ""
plan_ds.ApprovalStatus = ""
plan_ds.RTPlanName = ""
plan_ds.RTPlanLabel = ""
plan_ds.RTPlanDate = ""
plan_ds.QueryRetrieveLevel = "IMAGE"
results = scu.c_find("ARIA", plan_ds)

In [None]:
# Plan names to filter
plan_names = [
    "*1*BOT*", 
    "*1*Neck*", 
    "*1*Ophr*", 
    "*1H*N*", 
    "*1*Prtd*", 
    "*1*FOM*", 
    "*1*Cavs*", 
    "*1*Mxla*", 
    "*1*Nphr*", 
    "*1*Phnx*", 
    "*1*Sins*", 
    "*1*Sphn*", 
    "*1*tnsl*"
    ]

In [None]:
results_df = QueryRetrieveSCU.convert_results_to_df(results, plan_ds)
len(results_df)

In [None]:
# filter for plans where RTPlanLabels match the plan_names list
filters = {"RTPlanLabel": plan_names}
filtered_df = query_df(results_df, **filters)
# filtered_df.to_csv("../csv/all_HN_plans_aria.csv")
len(filtered_df)

In [None]:
# filter for approved plans only
filters_approved = {"RTPlanLabel": plan_names, "ApprovalStatus": "APPROVED"}
approved_df = query_df(results_df, **filters_approved)
# approved_df.to_csv("../csv/all_approved_HN_plans.csv")
len(approved_df)

In [None]:
# filter the boost plans out
filter_non_boost = {"RTPlanLabel": {"neq":"b*"}}
approved_non_boost_df = query_df(approved_df, **filter_non_boost).reset_index(drop=True)
# approved_non_boost_df.to_csv("../csv/approved_HN_non_boost_plans.csv")
len(approved_non_boost_df)

In [None]:
# filter for plans after 2015
filter_plan_date = {"RTPlanDate": {"gte":"2015-01-01"}}
approved_non_boost_gte_2015_df = query_df(approved_non_boost_df, **filter_plan_date)
len(approved_non_boost_gte_2015_df)

In [None]:
approved_non_boost_gte_2015_df.head()

In [None]:
# Query for RTRECORDs that refernce the RTPlans 
# then filter out RTPlans not referenced by any RTRECORDS 
# (check if it is different from 1275)
record_results_list = []
unique_plan_results = set()
record_query_ds = Dataset()
record_query_ds.PatientID = ""
record_query_ds.StudyInstanceUID = ""
record_query_ds.SeriesInstanceUID = ""
record_query_ds.SOPInstanceUID = ""
record_query_ds.SOPClassUID = CLASS_UID_BY_MODALITY["RTRECORD"]
record_query_ds.ReferencedSOPClassUID = CLASS_UID_BY_MODALITY["RTPLAN"]
record_query_ds.ReferencedSOPInstanceUID = ""
record_query_ds.QueryRetrieveLevel = "IMAGE"
for indx, row in tqdm(approved_non_boost_gte_2015_df.iterrows(), 
                      total=len(approved_non_boost_gte_2015_df), 
                      desc="Processing plans", leave=False):
    record_query_ds.PatientID = row["PatientID"]
    record_query_ds.StudyInstanceUID = row["StudyInstanceUID"]
    record_query_ds.ReferencedSOPInstanceUID = row["SOPInstanceUID"]
    record_results = scu.c_find("ARIA", record_query_ds)
    if record_results:
        for record in record_results:
            if record.ReferencedSOPInstanceUID == row["SOPInstanceUID"]:
                record.Modality = "RTRECORD"
                if row["SOPInstanceUID"] not in unique_plan_results:
                    unique_plan_results.add(row["SOPInstanceUID"])
                    record_results_list.append(record)

In [None]:
len(unique_plan_results)

In [None]:
len(record_results_list)

In [None]:
records_with_unique_referenced_plans_df = QueryRetrieveSCU.convert_results_to_df(record_results_list, record_query_ds)

In [None]:
len(records_with_unique_referenced_plans_df)

In [None]:
# records_with_unique_referenced_plans_df.to_csv("../csv/records_with_unique_referenced_plans_aria.csv")

In [None]:
no_record_plans = []
for indx, row in approved_non_boost_gte_2015_df.iterrows():
    if row["SOPInstanceUID"] not in unique_plan_results:
        no_record_plans.append(row["SOPInstanceUID"])

In [None]:
len(no_record_plans)

In [None]:
# plans that are referenced by at least one RTRECORD
nonboost_plans_with_records_df = approved_non_boost_gte_2015_df[
    approved_non_boost_gte_2015_df["SOPInstanceUID"].isin(unique_plan_results)
].reset_index(drop=True)
len(nonboost_plans_with_records_df)

In [None]:
c_move_queue = Queue()

In [None]:
# for each filtered plan, query for the dose, rtstruct and ct image and add to queue only if it has all
for indx, row in tqdm(nonboost_plans_with_records_df.iterrows(), 
                      total=len(nonboost_plans_with_records_df), 
                      desc="Processing plans", 
                      leave=False):
    # Move RTPlan to staging
    plan_move_ds = Dataset()
    plan_move_ds.PatientID = row["PatientID"]
    plan_move_ds.StudyInstanceUID = row["StudyInstanceUID"]
    plan_move_ds.SeriesInstanceUID = row["SeriesInstanceUID"]
    plan_move_ds.SOPInstanceUID = row["SOPInstanceUID"]
    plan_move_ds.QueryRetrieveLevel = "IMAGE"
    scu.c_move("ARIA", plan_move_ds, "YA_DOSE")
    if received_dcm:
        ds_plan = received_dcm[0]
        received_dcm = []
        # query for all RTDose that reference the RTPlan
        dose_query_ds = Dataset()
        dose_query_ds.PatientID = row["PatientID"]
        dose_query_ds.StudyInstanceUID = row["StudyInstanceUID"]
        dose_query_ds.SeriesInstanceUID = ""
        dose_query_ds.SOPInstanceUID = ""
        dose_query_ds.SOPClassUID = CLASS_UID_BY_MODALITY["RTDOSE"]
        ReferencedRTPlanSequence = Sequence()
        ReferencedRTPlanDataset = Dataset()
        ReferencedRTPlanDataset.ReferencedSOPClassUID = (
            CLASS_UID_BY_MODALITY["RTPLAN"]
        )
        ReferencedRTPlanDataset.ReferencedSOPInstanceUID = row["SOPInstanceUID"]
        ReferencedRTPlanSequence.append(ReferencedRTPlanDataset)
        dose_query_ds.ReferencedRTPlanSequence = ReferencedRTPlanSequence
        dose_query_ds.DoseSummationType = "PLAN"
        dose_query_ds.QueryRetrieveLevel = "IMAGE"
        dose_results = scu.c_find("ARIA", dose_query_ds)
        if dose_results:
            # get the referenced RTStruct UID
            referenced_items = get_referenced_sop_instance_uids(ds_plan)
            referenced_structs = referenced_items.get(CLASS_UID_BY_MODALITY["RTSTRUCT"], None)
            if referenced_structs:
                struct_uid = referenced_structs[0]
                # move the RTStruct to staging
                struct_move_ds = Dataset()
                struct_move_ds.PatientID = row["PatientID"]
                struct_move_ds.StudyInstanceUID = row["StudyInstanceUID"]
                struct_move_ds.SeriesInstanceUID = ""
                struct_move_ds.SOPInstanceUID = struct_uid
                struct_move_ds.QueryRetrieveLevel = "IMAGE"
                scu.c_move("ARIA", struct_move_ds, "YA_DOSE")
                if received_dcm:
                    ds_struct = received_dcm[0]
                    received_dcm = []
                    # get the referenced image UID
                    referenced_slices = get_referenced_sop_instance_uids(ds_struct)
                    referenced_ct_slices = referenced_slices.get(CLASS_UID_BY_MODALITY["CT"], None)
                    if referenced_ct_slices:
                        slice_uid = referenced_ct_slices[0]
                        # query and get the SeriesInstanceUID of the image
                        ct_query_ds = Dataset()
                        ct_query_ds.PatientID = row["PatientID"]
                        ct_query_ds.StudyInstanceUID = row["StudyInstanceUID"]
                        ct_query_ds.SeriesInstanceUID = ""
                        ct_query_ds.SOPInstanceUID = slice_uid
                        ct_query_ds.QueryRetrieveLevel = "IMAGE"
                        ct_results = scu.c_find("ARIA", ct_query_ds)
                        if ct_results:
                            # queue everything for moving to save
                            for ct_move_ds in ct_results:
                                ct_move_ds.SOPInstanceUID = ""
                                ct_move_ds.QueryRetrieveLevel = "SERIES"
                                c_move_queue.put(ct_move_ds)
                            c_move_queue.put(struct_move_ds)
                            c_move_queue.put(plan_move_ds)
                            for dose_move_ds in dose_results:
                                c_move_queue.put(dose_move_ds) 

In [None]:
c_move_queue.qsize()

In [None]:
scp.remove_custom_function_store(stage_received_dcms)

In [None]:
scp.add_custom_function_store(save_received_dcms)

In [None]:
error_ds = []

In [None]:
# now move all complete data (plan, dose, struct, planning image)
while not c_move_queue.empty():
    move_ds = c_move_queue.get()
    try:
        scu.c_move("ARIA", move_ds, "YA_DOSE")
    except Exception:
        error_ds.append(move_ds)

In [None]:
len(error_ds)

In [None]:
scp.stop()