# 1. Read in all the XML reports

Description: Cycle through all of the xml reports and convert to 3 datasets

- lungcaddata: NOT USED
- scan_info_data: holds scan level info, including spacing, origiin etc.
- nodule_info_dtaa: holds raw Veolity generated nodule data


In [33]:

import pandas as pd
from pathlib import Path
import xml.etree.ElementTree as ET


def recursive_parse(element, parent_key=''):
    items = {}
    for child in element:
        key = f"{parent_key}.{child.tag}" if parent_key else child.tag
        if len(child):
            items.update(recursive_parse(child, key))
        else:
            items[key] = child.text
    return items

def parse_xml(xml_file):
    

    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Extracting LUNGCAD data
    for lungcad in root.findall('LungCAD'):
        lungcad_data = recursive_parse(lungcad)

    # Extracting scan_info data
    scan_info_data = recursive_parse(root.findall('ImageInfo'))

    nodules_data = []
    # Extracting nodules data
    for nodule in root.findall('*Finding'):
        nodules_data.append(recursive_parse(nodule))

    return lungcad_data, scan_info_data, nodules_data


xml_scan_data = []
xml_nodule_data = []

for xml_file in Path('Veolity').rglob('*.xml'):
    if xml_file.suffix == '.xml':

        _, scan_info_data, nodules_data = parse_xml(xml_file)


        xml_scan_data.append(scan_info_data)

        for nodule in nodules_data:
            nodule['PatientUID'] = scan_info_data['ImageInfo.PatientUID']
            nodule['SeriesUID'] = scan_info_data['ImageInfo.SeriesUID']
        xml_nodule_data.extend(nodules_data)


xml_scan_data = pd.DataFrame(xml_scan_data)
display(xml_scan_data.head())

xml_nodule_data = pd.DataFrame(xml_nodule_data)
xml_nodule_data['X'] = xml_nodule_data['X'].astype(float)
xml_nodule_data['Y'] = xml_nodule_data['Y'].astype(float)
xml_nodule_data['Z'] = xml_nodule_data['Z'].astype(float)
xml_nodule_data['Probability'] = xml_nodule_data['Probability'].astype(float)
xml_nodule_data['Volume_mm3'] = xml_nodule_data['Diameter_mm'].astype(float)
xml_nodule_data['Diameter_mm'] = xml_nodule_data['Diameter_mm'].astype(float)
display(xml_nodule_data.head())


Unnamed: 0,ImageInfo.Dimensions.dimX,ImageInfo.Dimensions.dimY,ImageInfo.Dimensions.dimZ,ImageInfo.VoxelSize.voxelSizeX,ImageInfo.VoxelSize.voxelSizeY,ImageInfo.VoxelSize.voxelSizeZ,ImageInfo.Origin.originX,ImageInfo.Origin.originY,ImageInfo.Origin.originZ,ImageInfo.Orientation,ImageInfo.PatientName,ImageInfo.PatientUID,ImageInfo.StudyUID,ImageInfo.SeriesUID
0,512,512,428,0.65,0.65,0.800049,-137.956,-162.956,1359.9,"1, 0, 0, 0, 1, 0, 0, 0, 1",UCLH_48592224,UCLH_48592224,1.3.6.1.4.1.34261.20.1148129279372224548232553...,1.3.6.1.4.1.34261.20.1755228741968966248152185...
1,512,512,376,0.625,0.625,0.800049,-159.688,-159.688,1319.0,"1, 0, 0, 0, 1, 0, 0, 0, 1",UCLH_49013782,UCLH_49013782,1.3.6.1.4.1.34261.20.1785469969471461464513634...,1.3.6.1.4.1.34261.20.4635693852591216991212794...
2,512,512,406,0.747,0.747,0.800049,-191.032,-197.282,1458.0,"1, 0, 0, 0, 1, 0, 0, 0, 1",UCLH_68712279,UCLH_68712279,1.3.6.1.4.1.34261.20.1795244699579476851714245...,1.3.6.1.4.1.34261.20.7859224623825954856612328...
3,512,512,369,0.625,0.625,0.800049,-159.688,-159.688,1665.6,"1, 0, 0, 0, 1, 0, 0, 0, 1",UCLH_11555940,UCLH_11555940,1.3.6.1.4.1.34261.20.3925156779996455394979227...,1.3.6.1.4.1.34261.20.4894172754445862172349492...
4,512,512,357,0.625,0.625,0.800049,-159.688,-159.688,1535.2,"1, 0, 0, 0, 1, 0, 0, 0, 1",UCLH_60804427,UCLH_60804427,1.3.6.1.4.1.34261.20.6862961351526128359435912...,1.3.6.1.4.1.34261.20.2765949936138545773929212...


Unnamed: 0,ID,X,Y,Z,Probability,Volume_mm3,Diameter_mm,PatientUID,SeriesUID
0,26,-76.8562,19.6938,1619.9,-676.747,4.58968,4.58968,UCLH_48592224,1.3.6.1.4.1.34261.20.1755228741968966248152185...
1,8,37.1875,32.8125,1599.8,23.6493,4.77257,4.77257,UCLH_49013782,1.3.6.1.4.1.34261.20.4635693852591216991212794...
2,11,-42.8125,54.6875,1599.0,148.627,4.74688,4.74688,UCLH_49013782,1.3.6.1.4.1.34261.20.4635693852591216991212794...
3,45,102.188,16.5625,1391.8,399.74,7.1475,7.1475,UCLH_49013782,1.3.6.1.4.1.34261.20.4635693852591216991212794...
4,46,139.688,22.8125,1390.2,-371.517,4.52349,4.52349,UCLH_49013782,1.3.6.1.4.1.34261.20.4635693852591216991212794...


# 2. Get Scan Metadata

For each scan there are multiple series, for consistency with SUMMIT we need to only use the soft tissue Veolity report.

In [5]:
scan_metadata = pd.read_csv('/Users/john/Projects/ScansTransfer/LSUT/dicom_metadata_with_recon.csv')
scan_patient_uids = set(scan_metadata['SubjectID'].unique())

display(scan_metadata.head())

Unnamed: 0,SeriesInstanceUID,SubjectID,SeriesDescription,NumberOfFiles,filesby10,Reconstruction
0,1.3.6.1.4.1.34261.20.9893711945195858342155588...,UCLH_00134949,2.0,2,0,DO_NOT_USE
1,1.3.6.1.4.1.34261.20.3438257846713158166155226...,UCLH_00134949,Lung 1.0 hom-1082/LDLS,326,32,Lung
2,1.3.6.1.4.1.34261.20.4941623947498558212956748...,UCLH_00134949,5 hom-1082/LDLS,2,0,DO_NOT_USE
3,1.3.6.1.4.1.34261.20.8746993336615974177722837...,UCLH_00134949,Body 1.0 hom-1082/LDLS,326,32,Soft
4,1.3.6.1.4.1.34261.20.4727291588139957739148357...,UCLH_00239233,2.0,2,0,DO_NOT_USE


In [37]:
# Run some quick checks on the numbers of xml reports we have and how many are missing
soft_metadata = scan_metadata.query('Reconstruction == "Soft"')
soft_patient_uids = set(soft_metadata['SubjectID'].unique())

print(f"Number of patients with soft reconstructions: {len(soft_patient_uids)}")

xml_patient_uids = set(xml_scan_data['ImageInfo.PatientUID'].unique())
print(f"Number of patients with xml reports: {len(xml_patient_uids)}")

missing_patient_uids = soft_patient_uids - xml_patient_uids
print(f"Number of patients with soft reconstructions but no xml reports: {len(missing_patient_uids)}")

subset_patient_uids = soft_patient_uids.intersection(xml_patient_uids) - set(xml_nodule_data['PatientUID'].unique())
print(f"Number of patients with soft reconstructions and xml reports but no nodules: {len(subset_patient_uids)}")

Number of patients with soft reconstructions: 757
Number of patients with xml reports: 565
Number of patients with soft reconstructions but no xml reports: 198
Number of patients with soft reconstructions and xml reports but no nodules: 117


# 3. Isolate the Soft Tissue Generated Nodules

In [40]:
soft_series_uids = set(soft_metadata['SeriesInstanceUID'].unique())
soft_xml_nodule_data = xml_nodule_data.query('SeriesUID in @soft_series_uids')

print(f"Number of scans in XML:", len(xml_patient_uids))
print(f"Number of nodules in XML:", len(xml_nodule_data))

scan_ids_with_nodules = set(soft_xml_nodule_data['PatientUID'].unique())

Number of scans in XML: 565
Number of nodules in XML: 2652


# 4. Make sure that the scan exists on disk

In [28]:
lung_listings = (
    pd.read_csv('lung_listings.txt', header=None, names=['nifti_path'])
    .assign(patient_uid=lambda x: x['nifti_path'].str.split('/').str[-2])
)

lung_patient_uids = set(lung_listings['patient_uid'].unique())
soft_xml_patient_uids = set(soft_xml_nodule_data['PatientUID'].unique())

missing_in_lung_listings = soft_xml_patient_uids - lung_patient_uids
missing_in_soft_xml = lung_patient_uids - soft_xml_patient_uids

print(f"Number of patients in lung_listings:", len(lung_patient_uids))
print(f"Number of patients in soft_xml_nodule_data:", len(soft_xml_patient_uids))
print(f"IDs in soft_xml_nodule_data but not in lung_listings:", len(missing_in_lung_listings))
print(f"IDs in lung_listings but not in soft_xml_nodule_data:", len(missing_in_soft_xml))

Number of patients in lung_listings: 731
Number of patients in soft_xml_nodule_data: 339
IDs in soft_xml_nodule_data but not in lung_listings: 0
IDs in lung_listings but not in soft_xml_nodule_data: 392


# 5. Convert the Nodule data to a dataset_json

This is to feed the transformation to binary masks for all the boxes

In [9]:
import json

dataset_json = {'training': [], 'validation': [], 'test': []}


initial_count = len(soft_xml_nodule_data)
soft_xml_nodule_data = soft_xml_nodule_data[soft_xml_nodule_data['PatientUID'].isin(lung_listings['patient_uid'])]
final_count = len(soft_xml_nodule_data)
dropped_count = initial_count - final_count

print(f"Number of nodules dropped: {dropped_count}")
print(f"Number of scans without nodules: ")

for patient_uid, group in soft_xml_nodule_data.groupby('PatientUID'):
    dataset_json['test'].append(
        {
            'image': f'{patient_uid}/{patient_uid}.nii.gz',
            'box': [
                [
                    nodule.X,
                    nodule.Y,
                    nodule.Z,
                    nodule.Diameter_mm,
                    nodule.Diameter_mm,
                    nodule.Diameter_mm
                ] 
                for nodule in group.itertuples()
            ],
            'label': [1] * len(group)
        }
    )
    

print(dataset_json)
with open('../../models/detection/datasplits/lsut/box_mask_dataset.json', 'w') as f:
    json.dump(dataset_json, f, indent=4)

total_boxes = sum(len(scan['box']) for scan in dataset_json['test'])
average_boxes_per_scan = total_boxes / len(dataset_json['test'])

print('Total number of scans:', len(dataset_json['test']))
print(f"Total number of boxes: {total_boxes}")
print(f"Average number of boxes per scan: {average_boxes_per_scan:.2f}")

Number of nodules dropped: 90
Number of scans without nodules: 
{'training': [], 'validation': [], 'test': [{'image': 'UCLH_00134949/UCLH_00134949.nii.gz', 'box': [[24.375, -15.625, -1449.6, 6.66083, 6.66083, 6.66083], [-70.0, 61.25, -1566.4, 3.08486, 3.08486, 3.08486]], 'label': [1, 1]}, {'image': 'UCLH_00239233/UCLH_00239233.nii.gz', 'box': [[68.5829, 59.4617, 1810.9, 12.4623, 12.4623, 12.4623], [-9.2571, 93.5167, 1789.3, 6.5675, 6.5675, 6.5675], [-15.5121, 76.8367, 1781.3, 12.2557, 12.2557, 12.2557], [-98.9121, 17.7617, 1625.3, 7.6227, 7.6227, 7.6227]], 'label': [1, 1, 1, 1]}, {'image': 'UCLH_00489272/UCLH_00489272.nii.gz', 'box': [[48.9066, -43.9059, 1841.1, 4.91498, 4.91498, 4.91498], [-66.7184, 93.5941, 1768.3, 5.76312, 5.76312, 5.76312]], 'label': [1, 1]}, {'image': 'UCLH_00948384/UCLH_00948384.nii.gz', 'box': [[48.75, 56.25, -968.6, 5.57299, 5.57299, 5.57299], [56.25, 60.0, -979.8, 3.9625, 3.9625, 3.9625], [65.625, 55.625, -984.6, 3.02273, 3.02273, 3.02273], [54.375, 46.875, -9

# 6. Checks on scan / nodule data

In [11]:
import nibabel as nib
import numpy as np
import subprocess


def copy_and_mask(mask_id, source_path, destination_path):


    if Path(f"{source_path}/box-masks/{mask_id}").exists():
        print(f"Box masks for {mask_id} already exists")
    else:
        subprocess.run(
            [
                "scp", 
                "-P", 
                "2222", 
                "-r", 
                f"jmccabe@localhost:{source_path}/box-masks/{mask_id}",
                f"{destination_path}/box-masks/."
            ],
            check=True
        )

    if Path(f"{source_path}/detection/{mask_id}").exists():
        print(f"Detection masks for {mask_id} already exists")
    else:
        subprocess.run(
            [
                "scp", 
                "-P", 
                "2222", 
                "-r", 
                f"jmccabe@localhost:{source_path}/detection/{mask_id}",
                f"{destination_path}/detection/."
            ],
            check=True
        )

    return True

source_path = "/cluster/project0/lung-triage/lsut"
destination_path = "../../cache/sota/lsut"

# for mask_id in ['UCLH_00489272','UCLH_00239233','UCLH_01212990','UCLH_00948384','UCLH_01133076','UCLH_00134949']:
#     _ = copy_and_mask(mask_id, source_path, destination_path)


# 7. Run comparisons against annotations

1. Check how many scans with nodules have been processed and how many are left to be processed
2. Check that the number of nodules marries up with the number of boxes
3. Convert the boxes to row, col, slice (for initial resolution)
4. Check box exists for recorded nodules, convert slices-nod_slice in real world co-ordinates

In [110]:
def pixel_to_real_world(offset, spacing, pixel_value):
    return offset + pixel_value * spacing

annotations = pd.read_csv('annotations.csv')
metaio_metadata = pd.read_csv('lung_metadata.csv').assign(scan_id=lambda x: x['scan_id'].str.replace('.mhd', ''))

annotations = pd.merge(
    metaio_metadata,
    annotations,
    left_on='scan_id',
    right_on='ScananonID',
    how='left'
)

annotations['Nod1_floc'] = annotations.apply(
    lambda row: row['slices'] - row['Nod1_loc'] if pd.notnull(row['Nod1_loc']) else None, axis=1
)

annotations['Nod2_floc'] = annotations.apply(
    lambda row: row['slices'] - row['Nod2_loc'] if pd.notnull(row['Nod2_loc']) else None, axis=1
)
    
annotations['Nod1_real_world'] = annotations.apply(
    lambda row: pixel_to_real_world(row['z-offset'], row['z-spacing'], row['Nod1_floc']) if pd.notnull(row['Nod1_floc']) else (None), axis=1
)

annotations['Nod2_real_world'] = annotations.apply(
    lambda row: pixel_to_real_world(row['z-offset'], row['z-spacing'], row['Nod2_floc']) if pd.notnull(row['Nod2_floc']) else (None), axis=1
)

np.random.seed(42)  # For reproducibility
annotations['Reader'] = np.random.choice([1, 2], size=len(annotations), p=[0.5, 0.5])

print('Total number of annotations:', annotations.shape[0])
print('Total number of scans with soft reconstructions:', soft_metadata.shape[0])
print('Total number of scans saved to nifti (Useable):', lung_listings.shape[0])

nifti_patient_uids = set(lung_listings['patient_uid'])
usable_patient_uids = nifti_patient_uids.intersection(xml_patient_uids)
annotations = annotations.query('ScananonID in @usable_patient_uids')

print('*' * 50)
print('Total number of nifti with xml:', len(usable_patient_uids))

print('-Usable without nodules (Total_no_nods):', annotations.query('Total_no_nods == 0').shape[0])
print('-Usable with nodules (Total_no_nods):', annotations.query('Total_no_nods > 0').shape[0])
print('--Useable with nodules but Nod1_loc is Null:', annotations.query('Total_no_nods > 0 and Nod1_loc.isnull()').shape[0])

scans_with_xml_scan_info = set(annotations.query('Total_no_nods > 0')['ScananonID']).intersection(xml_scan_data['ImageInfo.PatientUID'])
print('--Useable with nodules with xml scan info:', len(scans_with_xml_scan_info))

scans_with_xml_nodules = set(annotations.query('Total_no_nods > 0')['ScananonID']).intersection(xml_nodule_data['PatientUID'])
print('--Useable with nodules with xml nodules:', len(scans_with_xml_nodules))


reader1_no_nodule_samples = annotations.query('Total_no_nods == 0 and Reader == 1').sample(10)
reader2_no_nodule_samples = annotations.query('Total_no_nods == 0 and Reader == 2').sample(10)

reader1_nodule_samples = annotations.query('Total_no_nods > 0 and Reader == 1')
reader2_nodule_samples = annotations.query('Total_no_nods > 0 and Reader == 2')


print()
print('Reader 1 samples without nodules:',len(reader1_no_nodule_samples))
print('Reader 1 samples with nodules:',len(reader1_nodule_samples))
print('Reader 1 samples without nodule annotations:',len(reader1_nodule_samples.query('Nod1_loc.isnull()')))
print('Reader 1 samples with nodules but no xml nodules:',len(reader1_nodule_samples.query('ScananonID not in @scans_with_xml_nodules')))
reader1_samples = pd.concat([reader1_nodule_samples, reader1_no_nodule_samples])
print(f'Reader 1 total samples ({reader1_samples.shape[0]}):')
display(reader1_samples.head())

print()
print('Reader 2 samples without nodules:',len(reader2_no_nodule_samples))
print('Reader 2 samples with nodules:',len(reader2_nodule_samples))
print('Reader 2 samples without nodule annotations:',len(reader2_nodule_samples.query('Nod1_loc.isnull()')))
print('Reader 2 samples with nodules but no xml nodules:',len(reader2_nodule_samples.query('ScananonID not in @scans_with_xml_nodules')))
reader2_samples = pd.concat([reader2_nodule_samples, reader2_no_nodule_samples])
print(f'Reader 2 total samples ({reader2_samples.shape[0]}):')
display(reader2_samples.head())
print()

Total number of annotations: 732
Total number of scans with soft reconstructions: 757
Total number of scans saved to nifti (Useable): 731
**************************************************
Total number of nifti with xml: 535
-Usable without nodules (Total_no_nods): 423
-Usable with nodules (Total_no_nods): 111
--Useable with nodules but Nod1_loc is Null: 32
--Useable with nodules with xml scan info: 111
--Useable with nodules with xml nodules: 100

Reader 1 samples without nodules: 10
Reader 1 samples with nodules: 53
Reader 1 samples without nodule annotations: 14
Reader 1 samples with nodules but no xml nodules: 5
Reader 1 total samples (63):


Unnamed: 0.1,Unnamed: 0,mhd_path,ObjectType,NDims,BinaryData,BinaryDataByteOrderMSB,CompressedData,CompressedDataSize,TransformMatrix,CenterOfRotation,...,feb_Path_N,feb_Path_M,feb_Path_PL,feb_Path_R,feb_Path_stage,Nod1_floc,Nod2_floc,Nod1_real_world,Nod2_real_world,Reader
0,0,/cluster/project0/lung-triage/lsut/LUNG/UCLH_0...,Image,3,True,False,True,107771947,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,0.0,0.0,0.0,0.0,Stage 1A,264.0,,-1452.8,,1
6,6,/cluster/project0/lung-triage/lsut/LUNG/UCLH_0...,Image,3,True,False,True,112077199,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,Unclassified,298.0,,1786.1,,1
19,19,/cluster/project0/lung-triage/lsut/LUNG/UCLH_2...,Image,3,True,False,True,126519155,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,,252.0,252.0,2118.1,2118.1,1
31,31,/cluster/project0/lung-triage/lsut/LUNG/UCLH_2...,Image,3,True,False,True,132028342,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,Unclassified,257.0,,1854.5,,1
32,32,/cluster/project0/lung-triage/lsut/LUNG/UCLH_1...,Image,3,True,False,True,109212820,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,Unclassified,143.0,,-912.9,,1



Reader 2 samples without nodules: 10
Reader 2 samples with nodules: 58
Reader 2 samples without nodule annotations: 18
Reader 2 samples with nodules but no xml nodules: 6
Reader 2 total samples (68):


Unnamed: 0.1,Unnamed: 0,mhd_path,ObjectType,NDims,BinaryData,BinaryDataByteOrderMSB,CompressedData,CompressedDataSize,TransformMatrix,CenterOfRotation,...,feb_Path_N,feb_Path_M,feb_Path_PL,feb_Path_R,feb_Path_stage,Nod1_floc,Nod2_floc,Nod1_real_world,Nod2_real_world,Reader
7,7,/cluster/project0/lung-triage/lsut/LUNG/UCLH_0...,Image,3,True,False,True,139838181,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,Unclassified,271.0,254.0,1721.7,1708.1,2
65,65,/cluster/project0/lung-triage/lsut/LUNG/UCLH_1...,Image,3,True,False,True,106454500,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,Unclassified,,,,,2
69,69,/cluster/project0/lung-triage/lsut/LUNG/UCLH_2...,Image,3,True,False,True,134591647,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,,220.0,,1244.5,,2
73,73,/cluster/project0/lung-triage/lsut/LUNG/UCLH_2...,Image,3,True,False,True,120070073,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,,144.0,,-1081.899398,,2
92,92,/cluster/project0/lung-triage/lsut/LUNG/UCLH_2...,Image,3,True,False,True,131962610,[[1 0 0]\n [0 1 0]\n [0 0 1]],[0. 0. 0.],...,,,,,,164.0,,-1035.8,,2





In [111]:

def generate_json_markup(nodule_data):

    slicer_markup = {
    "@schema": "https://raw.githubusercontent.com/slicer/slicer/master/Modules/Loadable/Markups/Resources/Schema/markups-schema-v1.0.3.json#",
    "markups": [
        {
            "type": "Fiducial",
            "coordinateSystem": "LPS",
            "coordinateUnits": "mm",
            "locked": False,
            "fixedNumberOfControlPoints": False,
            "labelFormat": "%N-%d",
            "lastUsedControlPointNumber": 1,
            "controlPoints": [
                {
                    "id": f"{nodule.ID}",
                    "label": f"F-{i}",
                    "description": f"{nodule.ID}",
                    "associatedNodeID": "vtkMRMLScalarVolumeNode1",
                    "position": [
                        nodule.X,
                        nodule.Y,
                        nodule.Z
                    ],
                    "orientation": [
                        -1.0,
                        -0.0,
                        -0.0,
                        -0.0,
                        -1.0,
                        -0.0,
                        0.0,
                        0.0,
                        1.0
                    ],
                    "selected": True,
                    "locked": False,
                    "visibility": True,
                    "positionStatus": "defined"
                }
                for i, nodule in enumerate(nodule_data.itertuples())
            ],
            "measurements": [],
            "display": {
                "visibility": True,
                "opacity": 1.0,
                "color": [
                    0.4,
                    1.0,
                    1.0
                ],
                "selectedColor": [
                    1.0,
                    0.5000076295109483,
                    0.5000076295109483
                ],
                "activeColor": [
                    0.4,
                    1.0,
                    0.0
                ],
                "propertiesLabelVisibility": False,
                "pointLabelsVisibility": True,
                "textScale": 3.0,
                "glyphType": "Sphere3D",
                "glyphScale": 3.0,
                "glyphSize": 5.0,
                "useGlyphScale": True,
                "sliceProjection": False,
                "sliceProjectionUseFiducialColor": True,
                "sliceProjectionOutlinedBehindSlicePlane": False,
                "sliceProjectionColor": [
                    1.0,
                    1.0,
                    1.0
                ],
                "sliceProjectionOpacity": 0.6,
                "lineThickness": 0.2,
                "lineColorFadingStart": 1.0,
                "lineColorFadingEnd": 10.0,
                "lineColorFadingSaturation": 1.0,
                "lineColorFadingHueOffset": 0.0,
                "handlesInteractive": False,
                "translationHandleVisibility": True,
                "rotationHandleVisibility": True,
                "scaleHandleVisibility": False,
                "interactionHandleScale": 3.0,
                "snapMode": "toVisibleSurface"
            }
        }
    ]
}   
    return slicer_markup



for patient_uid in xml_nodule_data['PatientUID'].unique():
    markup_json = generate_json_markup(xml_nodule_data.query(f"PatientUID == '{patient_uid}'"))

    Path(f'/Users/john/Projects/SOTAEvaluationNoduleDetection/cache/sota/lsut/detection/{patient_uid}').mkdir(parents=True, exist_ok=True)
    json.dump(markup_json, open(f'/Users/john/Projects/SOTAEvaluationNoduleDetection/cache/sota/lsut/detection/{patient_uid}/markup.json', 'w'), indent=4)
    