In [45]:
import pandas as pd
import os

def read_results(csv_path):
    df = pd.read_csv(csv_path)
    tissue = os.path.basename(csv_path).split('_')[1].split('.')[0]
    df['tissue'] = tissue
    network_rep = os.path.basename(csv_path.split('/')[-3]) if csv_path.split('/')[-2].startswith('dose') else os.path.basename(csv_path.split('/')[-2]) 
    network_rep = network_rep.split('_')
    df['network'] = "_".join(network_rep[:-2])
    if "_".join(network_rep[:-2]) == '3dres_fullres_LiTS_151':
        print(csv_path)
    df['rep'] = network_rep[-1]
    df['tta'] = 'tta' == network_rep[-4]

    date_phantom_ct =  os.path.basename(csv_path.split('/')[-5]) if csv_path.split('/')[-2].startswith('dose') else os.path.basename(csv_path.split('/')[-4]) 
    date_phantom_ct = date_phantom_ct.split('_')
    df['phantom'] = date_phantom_ct[1].replace('phantom', '')
    df['ct'] = date_phantom_ct[3]
    df['date'] = date_phantom_ct[0]

    return df



In [46]:
# import pandas as pd
# import os 

# def combine_photon_results(results_folder, type):
#     results = []
#     for root, dirs, files in os.walk(results_folder):
#         for file in files:
#             if file.startswith(type):
#                 df_temp = pd.read_csv(os.path.join(root, file))
#                 results.append(df_temp)
    
#     results_df = pd.concat(results)
#     results_df = results_df.to_csv(f'{results_folder}/{type[0:-1]}.csv', index=False)

# combine_photon_results("/home/uli/data/Insync/uligenske@gmail.com/Google Drive/arbeit/deep_learning/segmentation/datasets/phantom_datasets/230915_phantom241_043/results/outout_241_photon/output_241_photon_2d_tta_enabled_rep1", 
#                        "results_lesions_")


                

In [47]:
# read results

path_all_phantom_data = '/home/uli/data/Insync/uligenske@gmail.com/Google Drive/arbeit/deep_learning/segmentation/datasets/phantom_datasets'
all_phantom_dirs = os.listdir(path_all_phantom_data)
all_phantom_dirs = [d for d in all_phantom_dirs if os.path.isdir(os.path.join(path_all_phantom_data, d)) and len(d.split('_')) >= 4]
phantom_dirs_241_247 = [d for d in all_phantom_dirs if d.split('_')[1].endswith('241') or d.split('_')[1].endswith('247')]
phantom_dirs_241_247_path = [os.path.join(path_all_phantom_data, d) for d in phantom_dirs_241_247 ]
df = pd.DataFrame()
for path in phantom_dirs_241_247_path:
    for root, dirs, files in os.walk(path):
        for file in files:
            if file in ['results_lesion.csv', 'results_lesions.csv', 'results_liver.csv']:
                csv_path = os.path.join(root, file)
                df_temp = read_results(csv_path)
                df = pd.concat([df, df_temp])

            

In [48]:
df["date"].unique()

array(['250618', '250727', '240306', '250515', '230915', '250604',
       '250321', '250404', '250411', '231205', '250322', '250328'],
      dtype=object)

In [49]:
# add analysis meta information
df_non_photon = df[df["ct"].isin(["GE", "CT4"])].copy()
df_non_photon.loc[:,'dose'] = df_non_photon['analysis_name'].apply(lambda x: x.split('_')[2])
df_non_photon.loc[:,'kernel'] = df_non_photon['analysis_name'].apply(lambda x: x.split('_')[3])
df_non_photon.loc[:,'reco'] = df_non_photon['analysis_name'].apply(lambda x: x.split('_')[4])
df_non_photon.loc[:,'rev_time'] = df_non_photon["analysis_name"].apply(lambda x: x.split("_")[6] if len(x.split("_")) in [7,12,13] else None)

df_photon = df[df["ct"] == "photon"]
df_photon.loc[:,'dose'] = df_photon['analysis_name'].apply(lambda x: x.split('_')[2])
df_photon.loc[:,'CTDIvol'] = df_photon['analysis_name'].apply(lambda x: x.split('_')[-3])
df_photon.loc[:,'reco'] = df_photon['analysis_name'].apply(lambda x: x.split('_')[-2])
df_photon.loc[:,'kernel'] = df_photon['analysis_name'].apply(lambda x: x.split('_')[-1])
df_photon.loc[:,'slice_thickness'] = df_photon['analysis_name'].apply(lambda x: x.split('_')[-1])
df_photon.loc[:,'rev_time'] = df_photon["analysis_name"].apply(lambda x: x.split("_")[3])

df = pd.concat([df_photon, df_non_photon])
df = df.reset_index()


In [50]:
# some predictions are from empty volumes
# remove them, they can be identified via the size of the masks (49kb)
mask_path = "../../231205_phantom247_055_CT4/results/3d_fullres_LiTS+phantom_254_repetition_4"
masks = os.listdir(mask_path)
masks = [mask for mask in masks if os.path.getsize(os.path.join(mask_path, mask)) < 50000 and mask.endswith('.nii.gz')]
if len(masks) == 0:
    print("Warning: no masks to remove found!")
analysis_names_to_remove = [mask[:-7] for mask in masks]
df = df[~df['analysis_name'].isin(analysis_names_to_remove)]


In [51]:
#size filter for predictions, ignore predictions smaller than this volume
df['volume_pred'] = df.apply(lambda x: x['px_numb_pred']*0.586*0.586*0.8 if x['ct'] == 'canon' else x['px_numb_pred']*0.586*0.586*0.625, axis=1)
df['volume_gt'] = df.apply(lambda x: x['px_numb_gt']*0.586*0.586*0.8 if x['ct'] == 'canon' else x['px_numb_gt']*0.586*0.586*0.625, axis=1)

In [52]:
import tqdm
# add CTDIvol values
import pandas as pd
base_path = "/home/uli/data/Insync/uligenske@gmail.com/Google Drive/arbeit/deep_learning/segmentation/datasets/phantom_datasets"
pbar = tqdm.tqdm(total=len(df))
df_CDTI_CT4 = pd.read_csv('meta/dicom_dose_data_mA_CTDIvol_CT4.csv')
df_CDTI_GE = pd.read_csv('meta/dicom_dose_data_mA_CTDIvol_GE.csv')
df["dose"] = df["dose"].astype(int)
df["rev_time"] = df["rev_time"].astype(float)

phantom = df["phantom"].values
ct = df["ct"].values
dose = df["dose"].values
rev_time = df["rev_time"].values
cdti_vol = []
for index in range(len(df)):
    pbar.update(1)
    # for CT match by tubecurrent and for GE match by tubecurrent and revolution time
    if phantom[index] in ["241","247"] and ct[index] == "CT4":
        dose_CTDIvols = df_CDTI_CT4["CTDIvol"][df_CDTI_CT4["mA"] == dose[index]].values
    elif phantom[index] in ["241","247"] and ct[index] == "GE":
        dose_CTDIvols = df_CDTI_GE["CTDIvol"][(df_CDTI_GE["mA"] == dose[index]) & (df_CDTI_GE["RevolutionTime"] == rev_time[index])].values
    else:
        cdti_vol.append(df["CTDIvol"][index])
        continue
    
    if len(dose_CTDIvols) > 1:
        raise ValueError("more than one dose found")
    elif len(dose_CTDIvols) == 0:
        raise ValueError("no dose found")

    cdti_vol.append(dose_CTDIvols[0])

df["CTDIvol"] = cdti_vol
df["manufacturer"] = df["ct"].apply(lambda x: "ge" if x == "GE" else "canon")


  0%|          | 1/2906632 [05:34<270168:25:15, 334.62s/it]
100%|█████████▉| 2905976/2906632 [04:26<00:00, 7599.99it/s] 

In [53]:
rev_time[index]

0.5

In [54]:
df_liver = df[df['tissue'] == 'liver']
df_liver.to_csv('results_csv/results_liver_paper.csv', index=False)

In [55]:
df_liver["network"].unique()

array(['3d_fullres_LiTS_151_res', '3d_fullres_LiTS_151',
       '3d_fullres_LiTS+phantom_254_res', '3d_fullres_LiTS+phantom_255',
       '3d_fullres_LiTS+phantom_256', '3d_fullres_LiTS+phantom_257_res',
       '3d_fullres_LiTS+phantom_247_res', '3d_fullres_LiTS+phantom_247',
       '3d_fullres_LiTS+phantom_250_res',
       '3d_fullres_LiTS+metaphantom20-100mA_252_tta_enabled',
       '3d_fullres_LiTS+phantom_257', '3d_fullres_tta_enabled',
       '3d_fullres_LiTS+phantom_254', '2d_tta_enabled',
       '2d_LiTS+phantom_254', '3d_fullres_LiTS+phantom_250',
       '3d_fullres_LiTS+phantom_259', '3d_fullres_LiTS+phantom_258',
       '3d_fullres_LiTS+phantom_249', '3d_fullres_LiTS+phantom_246'],
      dtype=object)

In [56]:
df_lesions = df[df['tissue'] == 'lesions']
df_lesions.to_csv('results_csv/results_lesions_paper.csv', index=False)

In [57]:
df_lesion = df[df['tissue'] == 'lesion'].copy()
df_lesion.loc[:,"lesion_id"] = df_lesion["filename_gt"].apply(lambda x: str(x).split('_')[0])
df_lesion.loc[:,"filename_gt"] = df_lesion['filename_gt'].str.split('~')
df_lesion = df_lesion.explode("filename_gt")

In [58]:
base_path = "/home/uli/data/Insync/uligenske@gmail.com/Google Drive/arbeit/deep_learning/segmentation/datasets/phantom_datasets"
df_meta = pd.DataFrame()
# add metadata
paths_metainfo_phantoms = {
#    '238': ['../phantom_datasets/230912_phantom238_055_unzipped_archive/meta/config/spheres.xlsx', "Version 3 (finale)"],
    241: [os.path.join(base_path,'230915_phantom241_043_CT4/meta/config/spheres.xlsx'), "Version 3 (finale)"],
    247: [os.path.join(base_path,'231205_phantom247_055_CT4/meta/config/spheres.xlsx'), "Version 3 (finale)"],
#    '248': ['../phantom_datasets/230915_phantom241_043_unzipped_archive/meta/config/spheres.xlsx', "Version 3 final design"],
#    'semi': ['../phantom_datasets/230401_semiantro_unzipped_archive/meta/design_vorlage/spheres.xlsx', "1080_8-8-8-12-12-8-8"],
#    '043old': ['../phantom_datasets/230915_phantom241_043_unzipped_archive/meta/config/spheres.xlsx', "Version 3 final design"]
}
for phantom in paths_metainfo_phantoms.keys():
    meta_infos = pd.read_excel(paths_metainfo_phantoms[phantom][0], sheet_name=paths_metainfo_phantoms[phantom][1])
    meta_infos['phantom'] = str(phantom)
    meta_infos['lesion_id'] = meta_infos['name']
    df_meta = pd.concat([df_meta, meta_infos])



In [59]:
df_lesion_meta = df_lesion.merge(df_meta, on=['lesion_id', 'phantom'], how='outer')

In [60]:
df_lesion_meta.to_csv('results_csv/results_lesion_paper.csv', index=False)

100%|██████████| 2906632/2906632 [04:42<00:00, 7599.99it/s]

In [61]:
df_lesion_meta["network"].unique()

array(['3d_fullres_LiTS_151_res', '3d_fullres_LiTS_151',
       '3d_fullres_LiTS+phantom_254_res', '3d_fullres_LiTS+phantom_249',
       '3d_fullres_LiTS+phantom_255', '3d_fullres_LiTS+phantom_256',
       '3d_fullres_LiTS+phantom_257_res',
       '3d_fullres_LiTS+phantom_247_res', '3d_fullres_LiTS+phantom_247',
       '3d_fullres_LiTS+phantom_250_res', '3d_fullres_LiTS+phantom_246',
       '3d_fullres_LiTS+metaphantom20-100mA_252_tta_enabled',
       '3d_fullres_LiTS+phantom_257', '3d_fullres_tta_enabled',
       '3d_fullres_LiTS+phantom_254', '3d_fullres_LiTS+phantom_250',
       '3d_fullres_LiTS+phantom_259', '3d_fullres_LiTS+phantom_258',
       '2d_tta_enabled', '2d_LiTS+phantom_254'], dtype=object)