In [6]:
import pandas as pd
import nibabel as nib
import numpy as np
import os
from dice import dice

from utils import organizeFolders, organizeFiles, openImage

In [17]:
# Main Folder Dir
fp = 'D:\\Research\\Extra December Journal Submittion Cases'
splitter = '\\'

# Pandas dataframe to store everything
"""
Shorthand meanings
    - 'vol' = original image volume
    - 'GT' = ground truth segmentation
    - 'MS' = Manual Segmentation
    - 'SC' = Smart Click Segmentation
    - 'LS' = Level Set Segmentation
"""

df = pd.DataFrame(columns=[
    # Patient ID
    'patient_id',

    # Voxel Count
    'vol_voxCnt',
    'GT_voxCnt',
    'MS_voxCnt',
    'LS_voxCnt',

    # Dice Score
    'GT-MS_dice',
    'GT-LS_dice',
    'MS_LS_dice',
])

# Maping file names to respective categories
file_map = {
    'vol' : 'dicom_vol',
    'LM' : 'JL_pred_vol',
    # 'GT' : 'GT_DR_liver_cyst_vol',
    'rGT' : 'revisedGT_DR_liver_cyst_vol',
    # 'MS' : 'DR_liver_cyst_vol',
    # 'SC' : 'DR_liver_cyst_smart_click_vol',
    # 'JLRM' : 'JL_liver_roughMax_vol',
    'LS' : 'LevelSet_CL_liver_cyst_vol',
    'rLS' : 'revisedLevelSet_CL_liver_cyst_vol'}

inv_file_map = {v: k for k, v in file_map.items()}

In [18]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def findIndex(file_map, a):
    best_choice = None
    best_score = 0.8
    for k, v in file_map.items():
        if similar(a, k) > best_score:
            best_choice = similar(a, k)
            best_choice = k

    return best_choice, best_score

In [21]:
df = pd.DataFrame()

# Get folder paths
folder_dir = organizeFolders(fp)

# Get file paths and info
for i in folder_dir:
    # Get File Paths
    file_dir = organizeFiles(i)
    patient_id = i.split(splitter)[-1]

    # Organize all numpy arrays into dictionary following the file_map
    np_files = {}
    np_files_vol = {}

    for j in file_dir:
        np_arr, volume = openImage(j)
        try:
            np_files[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = np_arr
            np_files_vol[ inv_file_map[j.split(splitter)[-1].split('.nii')[0]] ] = volume
        except:
#             print('Error: ', j)

            index, score = findIndex(inv_file_map, j.split(splitter)[-1].split('.nii')[0])
            if index is not None and inv_file_map[index] not in np_files.keys():
#                 print('Found replacement for', j, 'with', index, 'with score of', score)
                np_files[ inv_file_map[index] ] = np_arr
                np_files_vol[ inv_file_map[index] ] = volume


#     print(patient_id, '---', np_files_vol)

    # Start geting information
    ## voxCnt and volume

    # Even the playing field
#     try:
#         liverMask, volume = openImage(os.path.join(i, file_map['LM']+'.nii.gz'))
#         np_files['MS'] *= liverMask
#         np_files['LS'] *= liverMask
#         print(i)
#     except:
#         pass
    
    
    temp = {}
    keyword = '_voxCnt'
    keyword2 = '_volume(in cm^3)'
    for k in np_files.keys():
        temp['patient_id'] = patient_id
        if k == 'vol':
            temp[k+keyword] = round(np_files[k].shape[0] * np_files[k].shape[1] * np_files[k].shape[2])
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)
        else:
            temp[k+keyword] = round(np_files[k].sum())
            temp[k+keyword2] = round(np_files_vol[k] * temp[k+keyword], 6)

    # Dice Score
    try:
#         temp['GT-MS_dice'] = dice(np_files['GT'], np_files['MS']) # Ground Truth - Manual Segmentation
#         temp['GT-SC_dice'] = dice(np_files['GT'], np_files['SC']) # Ground Truth - Smart Click Segmentation
        temp['GT_LS_dice'] = dice(np_files['rGT'], np_files['LS']) # Manual Segmentation - Smart Click Segmentation
        temp['GT_rLS_dice'] = dice(np_files['rGT'], np_files['rLS'])
        temp['LS_rLS_dice'] = dice(np_files['LS'], np_files['rLS'])
    except:
        print("Error: ", patient_id)


    df = df.append(pd.Series(temp, name=patient_id), ignore_index=True)

DEBUG: list directories - ['WC-IRB1308014251_039_Visit1', 'WC-IRB1308014251_032', 'WC-IRB1308014251_030', 'WC-IRB1308014251_003']


In [20]:
df.head()

Unnamed: 0,patient_id,vol_voxCnt,vol_volume(in cm^3),rGT_voxCnt,rGT_volume(in cm^3),LM_voxCnt,LM_volume(in cm^3),LS_voxCnt,LS_volume(in cm^3),rLS_voxCnt,rLS_volume(in cm^3),GT_LS_dice,GT_rLS_dice,LS_rLS_dice
0,WC-IRB1308014251_039_Visit1,3538944.0,69121.242,2371.0,46.309426,71141.0,1389.497623,6202.0,121.134989,2936.0,57.34478,0.362067,0.544187,0.623988
1,WC-IRB1308014251_032,26214400.0,64795.50625,40967.0,101.260281,629444.0,1555.829721,42337.0,104.646582,39584.0,97.841847,0.6919,0.69474,0.957825
2,WC-IRB1308014251_030,5529600.0,46785.6,5117.0,43.294617,140551.0,1189.193227,3754.0,31.762359,3684.0,31.170094,0.6085,0.617657,0.979833
3,WC-IRB1308014251_003,6323200.0,42829.8,4868.0,32.973094,256820.0,1739.554219,2867.0,19.419445,3578.0,24.235359,0.655462,0.720104,0.870132


In [22]:
df

Unnamed: 0,patient_id,vol_voxCnt,vol_volume(in cm^3),rGT_voxCnt,rGT_volume(in cm^3),LM_voxCnt,LM_volume(in cm^3),LS_voxCnt,LS_volume(in cm^3),rLS_voxCnt,rLS_volume(in cm^3),GT_LS_dice,GT_rLS_dice,LS_rLS_dice
0,WC-IRB1308014251_039_Visit1,3538944.0,69121.242,2371.0,46.309426,71141.0,1389.497623,6202.0,121.134989,2799.0,54.668951,0.362067,0.582205,0.571048
1,WC-IRB1308014251_032,26214400.0,64795.50625,40967.0,101.260281,629444.0,1555.829721,42337.0,104.646582,39771.0,98.304065,0.6919,0.697367,0.954889
2,WC-IRB1308014251_030,5529600.0,46785.6,5117.0,43.294617,140551.0,1189.193227,3754.0,31.762359,3776.0,31.9485,0.6085,0.66929,0.894024
3,WC-IRB1308014251_003,6323200.0,42829.8,4868.0,32.973094,256820.0,1739.554219,2867.0,19.419445,4193.0,28.401023,0.655462,0.785123,0.771671


In [23]:
df.to_csv('dice_score2.csv')