In [1]:
import json
import numpy as np
import os
os.chdir('/workspace/noise_and_dice/')

In [2]:
file_path = '/workspace/model-zoo/models/vista3d/docs/labels-TCIA-pediatric.json'
with open(file_path, 'r') as file:
    labels = json.load(file)  # Reads and parses the JSON content

In [3]:
label_list = []
for key,val in labels.items():
    print(key, ': ', val)
    label_list.append(val)

label_list = np.sort(label_list)

right adrenal gland :  8
left adrenal gland :  9
bladder :  15
duodenum :  13
esophagus :  11
gallbladder :  10
heart :  115
right kidney :  5
left kidney :  14
liver :  1
pancreas :  4
prostate :  118
small bowel :  19
spleen :  3
stomach :  12


In [4]:
print(label_list)

[  1   3   4   5   8   9  10  11  12  13  14  15  19 115 118]


## Mapping TS labels to vista

In [5]:
file_path = 'vista3D-TS-map.json'  # specify the output file path

if os.path.exists(file_path):
    with open(file_path,'r') as json_file:
        label_TCIA_map = json.load(json_file)
else: 
    label_TCIA_map = {}
    for key,val in labels.items():
        label_TCIA_map[key] =  input(f'Enter TCIA segmentation name for: {key}') + '.nii.gz'

    with open(file_path, 'w') as json_file:
        json.dump(label_TCIA_map, json_file, indent=4)  # indent=4 for pretty-printing

In [21]:
import SimpleITK as sitk
import numpy as np
import tqdm
def calculate_mask_volume(mask,pixelvolume,label=1):
    counts = np.count_nonzero(mask == label)
    return counts*pixelvolume

In [22]:
scan_dir = '/processed/Public/TCIA/'
label_dir = '/processed/Public/TCIA/predictions/TotalSegmentator'
out_dir = '/processed/Public/TCIA/predictions/TotalSegmentator_mapped'

## Create labels for vista 3d comparison

In [23]:
scans = set([f for f in os.listdir(scan_dir) if f.endswith('.nii.gz')])
masks = set(([f for f in os.listdir(out_dir) if f.endswith('.nii.gz')]))

missing_labels = scans - masks
print('no of labels: ', len(masks))
print('no of scans: ', len(scans))
#print(missing_labels)


no of labels:  0
no of scans:  359


## Create labels for vista3d evaluation

In [25]:
exam_ids = [f.split('.')[0]  for f in os.listdir(scan_dir) if f.endswith('.nii.gz')]

dict_list = []

for eid in tqdm.tqdm(exam_ids):
    mask_out = os.path.join(out_dir,eid+'.nii.gz')
    dict_sequence = {}
    scan_file = os.path.join(scan_dir,eid+'.nii.gz')
    img = sitk.ReadImage(scan_file) 
    size = img.GetSize()
    rows = size[0]
    cols = size[1]
    slices = size[2]
    spacing = img.GetSpacing()
    pixell = spacing[0]
    pixelw = spacing[1]
    thickness = spacing[2]
    pixel_area = pixell*pixelw
    pixel_volume = pixel_area*thickness
    img_arr = sitk.GetArrayViewFromImage(img)
    pixel_low = np.min(img_arr)
    pixel_high = np.max(img_arr)
    pixel_mean = np.mean(img_arr)
    pixel_median = np.median(img_arr)

    #print(eid)
    dict_sequence['patid'] = eid
    dict_sequence['rows']= rows
    dict_sequence['cols']= cols
    dict_sequence['slices'] = slices
    dict_sequence['pixel_volume'] =pixel_volume
    dict_sequence['pixel_area'] =pixel_area
    dict_sequence['slice_thickness'] =thickness
    dict_sequence['intensity_low'] = pixel_low
    dict_sequence['intensity_high'] = pixel_high
    dict_sequence['intensity_mean'] = pixel_mean
    dict_sequence['intensity_median'] = pixel_median
    
    output_arr = None
    folder_path = os.path.join(label_dir,eid)
    for key,val in label_TCIA_map.items():
        mask_file = os.path.join(folder_path,val)
        segment_name = val.split('.')[0]
        vol_key = segment_name + '_volume'
        if os.path.exists(mask_file):
            mask = sitk.ReadImage(mask_file)
            mask_arr = sitk.GetArrayFromImage(mask) 
            mask_arr[mask_arr > 0] = 1
            dict_sequence[vol_key]= np.round(calculate_mask_volume(mask_arr,pixel_volume,label=1)/1000) # in ml
            mask_arr = mask_arr * labels[key]
            #print('vol_key: ', vol_key, ' volume: ', dict_sequence[vol_key])
            if output_arr is None:
                output_arr = mask_arr
            else:
                # Use pixel-wise maximum to merge without summing labels
                output_arr = np.maximum(output_arr, mask_arr)
        else:
            dict_sequence[vol_key] = 0
    
    # print('Liver and spleen volume calculated')
    if output_arr is not None:
        mask_sitk = sitk.GetImageFromArray(output_arr)
        mask_sitk.CopyInformation(img)
        sitk.WriteImage(mask_sitk, mask_out)
    
    dict_list.append(dict_sequence)

100% 359/359 [25:29<00:00,  4.26s/it]


In [117]:
dict_list[0].keys()

dict_keys(['patid', 'rows', 'cols', 'slices', 'pixel_volume', 'pixel_area', 'slice_thickness', 'intensity_low', 'intensity_high', 'intensity_mean', 'intensity_median', 'Adrenal-Right_volume', 'Adrenal-Left_volume', 'Bladder_volume', 'Duodenum_volume', 'Esophagus_volume', 'Gall-Bladder_volume', 'Heart_volume', 'Kidney-Right_volume', 'Kidney-Left_volume', 'Liver_volume', 'Pancreas_volume', 'Prostate_volume', 'Small-Intestine_volume', 'Spleen_volume', 'Stomach_volume'])

In [118]:
import pandas as pd
df_summary = pd.DataFrame(dict_list)

41


In [120]:
list(df_summary)

['patid',
 'rows',
 'cols',
 'slices',
 'pixel_volume',
 'pixel_area',
 'slice_thickness',
 'intensity_low',
 'intensity_high',
 'intensity_mean',
 'intensity_median',
 'Adrenal-Right_volume',
 'Adrenal-Left_volume',
 'Bladder_volume',
 'Duodenum_volume',
 'Esophagus_volume',
 'Gall-Bladder_volume',
 'Heart_volume',
 'Kidney-Right_volume',
 'Kidney-Left_volume',
 'Liver_volume',
 'Pancreas_volume',
 'Prostate_volume',
 'Small-Intestine_volume',
 'Spleen_volume',
 'Stomach_volume']

In [119]:
#display(df_summary)
#df_summary.to_csv(os.path.join('TCIA_ped_imagestats.csv'),index=False)

Unnamed: 0,patid,rows,cols,slices,pixel_volume,pixel_area,slice_thickness,intensity_low,intensity_high,intensity_mean,...,Gall-Bladder_volume,Heart_volume,Kidney-Right_volume,Kidney-Left_volume,Liver_volume,Pancreas_volume,Prostate_volume,Small-Intestine_volume,Spleen_volume,Stomach_volume
0,07-14-2009-NA-CT-24459,512,512,179,0.369263,0.184631,2.000,-1000,1487,-426.563187,...,41.0,218.0,103.0,106.0,880.0,35.0,2.0,589.0,155.0,96.0
1,08-04-2004-NA-CT-80302,512,512,825,0.244141,0.390625,0.625,-1000,3071,-521.458677,...,11.0,378.0,140.0,135.0,1514.0,59.0,3.0,662.0,243.0,230.0
2,08-25-2004-NA-CT-14591,512,512,335,0.988770,0.494385,2.000,-1000,3115,-581.706953,...,48.0,522.0,227.0,227.0,2066.0,186.0,13.0,1624.0,284.0,118.0
3,11-22-2005-NA-CT-47368,512,512,737,0.137329,0.219727,0.625,-1000,3071,-524.165073,...,16.0,277.0,88.0,95.0,799.0,39.0,0.0,758.0,186.0,178.0
4,10-11-2008-NA-CT-72580,512,512,181,0.369263,0.184631,2.000,-1000,3071,-553.647913,...,12.0,144.0,45.0,35.0,401.0,7.0,1.0,352.0,31.0,150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,12-31-2007-NA-CT-34710,512,512,114,0.369263,0.184631,2.000,-1000,1273,-471.685191,...,10.0,36.0,44.0,46.0,463.0,21.0,0.0,302.0,54.0,54.0
355,09-16-2006-NA-CT-78668,512,512,320,0.781250,0.390625,2.000,-1000,3134,-524.867735,...,47.0,526.0,156.0,185.0,1877.0,55.0,0.0,1081.0,248.0,249.0
356,07-15-2008-NA-CT-42067,512,512,300,0.830841,0.415421,2.000,-1000,3071,-529.742713,...,22.0,472.0,168.0,155.0,2293.0,55.0,4.0,431.0,315.0,105.0
357,09-19-2008-NA-CT-87707,512,512,203,0.439453,0.219727,2.000,-1000,3071,-550.595664,...,12.0,201.0,0.0,86.0,614.0,24.0,0.0,409.0,97.0,169.0
