In [2]:
import pandas as pd
import nibabel as nib
import numpy as np
import os
from dice import dice

from utils import organizeFolders, organizeFiles, openImage

In [3]:
# Main Folder Dir
fp = '/data/Collin/Liver_Tumor_Segmentation_Methods/JL_cases'
splitter = '/'

# Pandas dataframe to store everything
"""
Shorthand meanings
    - 'vol' = original image volume
    - 'GT' = ground truth segmentation
    - 'MS' = Manual Segmentation
    - 'SC' = Smart Click Segmentation
    - 'LS' = Level Set Segmentation
"""

df = pd.DataFrame(columns=[
    # Patient ID
    'patient_id',

    # Voxel Count
    'vol_voxCnt',
    'GT_voxCnt',
    'MS_voxCnt',
    'LS_voxCnt',

    # Dice Score
    'GT-MS_dice',
    'GT-LS_dice',
    'MS_LS_dice',
])

# Maping file names to respective categories
file_map = {
    'vol' : 'dicom_vol',
    'LM' : 'JL_pred_vol',
    'GT' : 'GT_DR_liver_cyst_vol',
    # 'MS' : 'DR_liver_cyst_vol',
    # 'SC' : 'DR_liver_cyst_smart_click_vol',
    # 'JLRM' : 'JL_liver_roughMax_vol',
    'LS' : 'LevelSet_CL_liver_cyst_vol'}

inv_file_map = {v: k for k, v in file_map.items()}

In [4]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def findIndex(file_map, a):
    best_choice = None
    best_score = 0.8
    for k, v in file_map.items():
        if similar(a, k) > best_score:
            best_choice = similar(a, k)
            best_choice = k

    return best_choice, best_score

In [5]:
df = pd.DataFrame()

# Get folder paths
folder_dir = organizeFolders(fp)

# Get file paths and info
for i in folder_dir:
    # Get File Paths
    file_dir = organizeFiles(i)
    patient_id = i.split(splitter)[-1]

    # Organize all numpy arrays into dictionary following the file_map
    np_files = {}
    np_files_vol = {}

    for j in file_dir:
        np_arr, volume = openImage(j)
        try:
            np_files[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = np_arr
            np_files_vol[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = volume
        except:
#             print('Error: ', j)

            index, score = findIndex(inv_file_map, j.split(splitter)[-1].split('.nii')[0])
            if index is not None and inv_file_map[index] not in np_files.keys():
#                 print('Found replacement for', j, 'with', index, 'with score of', score)
                np_files[ inv_file_map[index] ] = np_arr
                np_files_vol[ inv_file_map[index] ] = volume


#     print(patient_id, '---', np_files_vol)

    # Start geting information
    ## voxCnt and volume

    # Even the playing field
#     try:
#         liverMask, volume = openImage(os.path.join(i, file_map['LM']+'.nii.gz'))
#         np_files['MS'] *= liverMask
#         np_files['LS'] *= liverMask
#         print(i)
#     except:
#         pass
    
    
    temp = {}
    keyword = '_voxCnt'
    keyword2 = '_volume(in cm^3)'
    for k in np_files.keys():
        temp['patient_id'] = patient_id
        if k == 'vol':
            temp[k+keyword] = round(np_files[k].shape[0] * np_files[k].shape[1] * np_files[k].shape[2])
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)
        else:
            temp[k+keyword] = round(np_files[k].sum())
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)

    # Dice Score
    try:
#         temp['GT-MS_dice'] = dice(np_files['GT'], np_files['MS']) # Ground Truth - Manual Segmentation
#         temp['GT-SC_dice'] = dice(np_files['GT'], np_files['SC']) # Ground Truth - Smart Click Segmentation
        temp['GT_LS_dice'] = dice(np_files['GT'], np_files['LS']) # Manual Segmentation - Smart Click Segmentation
    except:
        pass


    df = df.append(pd.Series(temp, name=patient_id), ignore_index=True)

DEBUG: list directories - ['WC-IRB1308014251_059', 'WC-IRB1308014251_003', 'WC-IRB1308014251_032', 'WC-IRB1308014251_025', 'WC-IRB1308014251_066_Visit2', 'WC-IRB1308014251_024_Visit2', 'WC-IRB1308014251_026', 'WC-IRB1308014251_034', 'WC-IRB1308014251_039_Visit1', 'WC-IRB1308014251_020', 'WC-IRB1308014251_066_Visit1', 'WC-IRB1308014251_058', 'WC-IRB1308014251_057', 'WC-IRB1308014251_046']


In [6]:
df.head()

Unnamed: 0,GT_LS_dice,GT_volume(in cm^3),GT_voxCnt,LM_volume(in cm^3),LM_voxCnt,LS_volume(in cm^3),LS_voxCnt,patient_id,vol_volume(in cm^3),vol_voxCnt
0,0.422033,390.901613,462.0,108901.635743,128709.0,699.730809,827.0,WC-IRB1308014251_059,4938560.0,5836800.0
1,0.655462,3297.309375,4868.0,173955.421875,256820.0,1941.944531,2867.0,WC-IRB1308014251_003,4282980.0,6323200.0
2,0.44264,10255.053554,41489.0,155582.972092,629444.0,14833.233359,60011.0,WC-IRB1308014251_032,6479551.0,26214400.0
3,0.588372,12619.340117,11486.0,186278.643355,169549.0,27176.736649,24736.0,WC-IRB1308014251_025,2232078.0,2031616.0
4,0.77391,10690.706415,36043.0,115670.103583,389974.0,10072.868237,33960.0,WC-IRB1308014251_066_Visit2,3421197.0,11534336.0


In [7]:
df

Unnamed: 0,GT_LS_dice,GT_volume(in cm^3),GT_voxCnt,LM_volume(in cm^3),LM_voxCnt,LS_volume(in cm^3),LS_voxCnt,patient_id,vol_volume(in cm^3),vol_voxCnt
0,0.422033,390.901613,462.0,108901.635743,128709.0,699.730809,827.0,WC-IRB1308014251_059,4938560.0,5836800.0
1,0.655462,3297.309375,4868.0,173955.421875,256820.0,1941.944531,2867.0,WC-IRB1308014251_003,4282980.0,6323200.0
2,0.44264,10255.053554,41489.0,155582.972092,629444.0,14833.233359,60011.0,WC-IRB1308014251_032,6479551.0,26214400.0
3,0.588372,12619.340117,11486.0,186278.643355,169549.0,27176.736649,24736.0,WC-IRB1308014251_025,2232078.0,2031616.0
4,0.77391,10690.706415,36043.0,115670.103583,389974.0,10072.868237,33960.0,WC-IRB1308014251_066_Visit2,3421197.0,11534336.0
5,0.541524,13018.546875,19220.0,163961.214844,242065.0,8790.567188,12978.0,WC-IRB1308014251_024_Visit2,3944850.0,5824000.0
6,0.716041,7619.920312,9006.0,89050.521094,105249.0,9664.928906,11423.0,WC-IRB1308014251_026,4678560.0,5529600.0
7,0.616423,9327.899302,11026.0,122057.922684,144278.0,7341.511894,8678.0,WC-IRB1308014251_034,5457659.0,6451200.0
8,0.348431,4445.392377,2276.0,138949.762334,71141.0,12113.498911,6202.0,WC-IRB1308014251_039_Visit1,6912124.0,3538944.0
9,0.557677,11500.29375,10138.0,95099.19375,83834.0,4597.621875,4053.0,WC-IRB1308014251_020,6272640.0,5529600.0


In [8]:
df.to_csv('dice_score.csv')