In [1]:
import pandas as pd
import nibabel as nib
import numpy as np
import os
from dice import dice

from utils import organizeFolders, organizeFiles, openImage

In [7]:
# Main Folder Dir
fp = '/data/Collin/Liver_Tumor_Segmentation_Methods/ISMRM_AutoLevelSet/'
splitter = '/'

# Pandas dataframe to store everything
"""
Shorthand meanings
    - 'vol' = original image volume
    - 'GT' = ground truth segmentation
    - 'MS' = Manual Segmentation
    - 'SC' = Smart Click Segmentation
    - 'LS' = Level Set Segmentation
"""

df = pd.DataFrame(columns=[
    # Patient ID
    'patient_id',

    # Voxel Count
    'vol_voxCnt',
    'GT_voxCnt',
    'MS_voxCnt',
    'LS_voxCnt',

    # Dice Score
    'GT-MS_dice',
    'GT-LS_dice',
    'MS_LS_dice',
])

# Maping file names to respective categories
file_map = {
    'vol' : 'dicom_vol',
    'LM' : 'pred_vol',
    'GT' : 'GT_liver_cyst_vol',
    'MS' : 'DR_liver_cyst_vol',
    'LS' : 'CL_liver_cyst_level_set_vol'}

inv_file_map = {v: k for k, v in file_map.items()}

In [8]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def findIndex(file_map, a):
    best_choice = None
    best_score = 0.8
    for k, v in file_map.items():
        if similar(a, k) > best_score:
            best_choice = similar(a, k)
            best_choice = k

    return best_choice, best_score

In [13]:
df = pd.DataFrame()

# Get folder paths
folder_dir = organizeFolders(fp)

# Get file paths and info
for i in folder_dir:
    # Get File Paths
    file_dir = organizeFiles(i)
    patient_id = i.split(splitter)[-1]

    # Organize all numpy arrays into dictionary following the file_map
    np_files = {}
    np_files_vol = {}

    for j in file_dir:
        np_arr, volume = openImage(j)
        try:
            np_files[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = np_arr
            np_files_vol[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = volume
        except:
            print('Error: ', j)

            index, score = findIndex(inv_file_map, j.split(splitter)[-1].split('.nii')[0])
            if index is not None and inv_file_map[index] not in np_files.keys():
                print('Found replacement for', j, 'with', index, 'with score of', score)
                np_files[ inv_file_map[index] ] = np_arr
                np_files_vol[ inv_file_map[index] ] = volume

            pass


    print(patient_id, '---', np_files_vol)

    # Start geting information
    ## voxCnt and volume

    temp = {}
    keyword = '_voxCnt'
    keyword2 = '_volume(in cm^3)'
    for k in np_files.keys():
        temp['patient_id'] = patient_id
        if k == 'vol':
            temp[k+keyword] = round(np_files[k].shape[0] * np_files[k].shape[1] * np_files[k].shape[2])
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)
        else:
            temp[k+keyword] = round(np_files[k].sum())
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)

    # Dice Score
    try:
#         temp['GT-MS_dice'] = dice(np_files['GT'], np_files['MS']) # Ground Truth - Manual Segmentation
#         temp['GT-SC_dice'] = dice(np_files['GT'], np_files['SC']) # Ground Truth - Smart Click Segmentation
        temp['MS_SC_dice'] = dice(np_files['MS'], np_files['LS']) # Manual Segmentation - Smart Click Segmentation
    except:
        pass


    df = df.append(pd.Series(temp, name=patient_id), ignore_index=True)

DEBUG: list directories - ['WC-IRB1308014251_071', 'WC-IRB1308014251_059', 'WC-IRB1308014251_042', 'WC-IRB1308014251_004', 'WC-IRB1308014251_055', 'WC-IRB1308014251_001', 'WC-IRB1308014251_016', 'WC-IRB1308014251_039_Visit2', 'WC-IRB1308014251_003', 'WC-IRB1308014251_061', 'WC-IRB1308014251_040', 'WC-IRB1308014251_006', 'WC-IRB1308014251_032', 'WC-IRB1308014251_025', 'WC-IRB1308014251_066_Visit2', 'WC-IRB1308014251_015', 'WC-IRB1308014251_011', 'WC-IRB1308014251_031', 'WC-IRB1308014251_030', 'WC-IRB1308014251_018', 'WC-IRB1308014251_024_Visit2', 'WC-IRB1308014251_026', 'WC-IRB1308014251_082_Visit2', 'WC-IRB1308014251_034', 'WC-IRB1308014251_051', 'WC-IRB1308014251_005', 'WC-IRB1308014251_017', 'WC-IRB1308014251_039_Visit1', 'WC-IRB1308014251_020', 'WC-IRB1308014251_033', 'WC-IRB1308014251_066_Visit1', 'WC-IRB1308014251_058', 'WC-IRB1308014251_057', 'WC-IRB1308014251_007', 'WC-IRB1308014251_046', 'WC-IRB1308014251_083_Visit2', 'WC-IRB1308014251_035', 'WC-IRB1308014251_044', 'WC-IRB13080

In [14]:
df

Unnamed: 0,LS_volume(in cm^3),LS_voxCnt,patient_id,vol_volume(in cm^3),vol_voxCnt,MS_SC_dice,MS_volume(in cm^3),MS_voxCnt
0,236861.013281,349691.0,WC-IRB1308014251_071,4508400.0,6656000.0,,,
1,37422.483644,44229.0,WC-IRB1308014251_059,4938560.0,5836800.0,,,
2,57423.75,61252.0,WC-IRB1308014251_042,6192000.0,6604800.0,,,
3,9103.672981,4661.0,WC-IRB1308014251_004,9472164.0,4849664.0,,,
4,,,,,,,,
5,38553.52001,126316.0,WC-IRB1308014251_001,7520963.0,24641536.0,,,
6,3603.513281,4259.0,WC-IRB1308014251_016,5198400.0,6144000.0,,,
7,38875.44375,51194.0,WC-IRB1308014251_039_Visit2,5054400.0,6656000.0,,,
8,2433.01875,3592.0,WC-IRB1308014251_003,4282980.0,6323200.0,0.540089,3970.589062,5862.0
9,,,,,,,,


In [7]:
df

Unnamed: 0,patient_id,vol_voxCnt,vol_volume(in cm^3),SC_voxCnt,SC_volume(in cm^3),MS_voxCnt,MS_volume(in cm^3),GT_voxCnt,GT_volume(in cm^3),MS_SC_dice
0,1,4456448.0,8273996.9,71072.0,131954.755823,74369.0,138076.081098,74556.0,138423.271825,0.976465


In [33]:
df.to_csv('dice_score.csv')