In [15]:
import os
import sys
from json_minify import json_minify # to allow comments in json files
import json
import time
import ants
import glob
import shutil
import pandas as pd
import numpy as np
# import pydicom as pyd
# import dicom2nifti
from directory_tree import display_tree  # Nice tool to display directory trees (https://pypi.org/project/directory-tree/)

from datetime import timedelta

In [16]:
def list_folder_content(path, show_hidden=False):
    if show_hidden:
        ddfldrlst = os.listdir(path)
    else:
        ddfldrlst = list(filter(lambda item: not item.startswith('.'),os.listdir(path)))      
    return ddfldrlst

def display_folder_list(file_list):
    print('\n'.join(f'[{idx}] - {file_idx}' for idx, file_idx in enumerate(file_list)))

def get_path_to_process(full_path):
    print('Folder content:')
    print(display_tree(full_path, header=True, string_rep=True, show_hidden=False, max_depth=2))
    folder_content = list_folder_content(full_path)
    # Ideally we'll have only one sub-folder inside the PreTreatment folder. If more than one, then we have to choose, but by default, we'll select the first one.
    idx_reg = 0
    if len(folder_content) > 1:
        display_folder_list(folder_content)
        idx_sel = input(f'Select the folder with the dataset_to_process to process (0-{len(folder_content)-1} or just press Enter to proceed with sub-folder {folder_content[idx_reg]}):')
        if idx_sel:
            idx_reg = int(idx_sel)
    path2data = os.path.join(full_path, folder_content[idx_reg])
    print(f'Will process {folder_content[idx_reg]}')
    return path2data

def check_time_points(path_to_check, nmax = 6):
    if path_to_check is not None:
        nr_of_folders = list_folder_content(path_to_check)
        print('Folder seems Ok:' if len(nr_of_folders)== nmax else f'Error! Check path {path_to_check} is the correct one:')
        display_tree(path_to_check, max_depth=1)
        return nr_of_folders if len(nr_of_folders) == nmax else None
    else:
        return None

def add_prefix_to_filename(full_path, prefix=None):
    # Assume the last part of the path is the filename (with extension)
    file_path, file_name_ext = os.path.split(full_path)
    if prefix:
        updated_filename = '_'.join([prefix, file_name_ext])
        return os.path.join(file_path, updated_filename)
    else:
        return prefix

def mutual_info_metric(volume1, volume2):
    """
    See https://antspy.readthedocs.io/en/latest/registration.html#ants.image_mutual_information
    For more complex/versatile image comparisons, see https://antspy.readthedocs.io/en/latest/utils.html#ants.image_similarity
    """
    mi_metric = ants.image_mutual_information(volume1, volume2)
    
    return mi_metric

def get_metrics(volume1, volume2, metric='mi', debug=False, sampling_pc=1.0):
    """
    See https://antspy.readthedocs.io/en/latest/utils.html#ants.image_similarity
    For more complex/versatile metrics, see https://antspy.readthedocs.io/en/latest/registration.html#ants.image_mutual_information    
    """
    if debug:
        start_time = time.perf_counter()
        print(''.join(['§']*100))
        print(f'§§§§§§§§§§§§ GET_METRICS: Timing the calculations of metric {metric}...')
    if metric == 'mi':
        val = ants.image_mutual_information(volume1, volume2)
    elif metric == 'mean_sq':
        val = ants.image_similarity(volume1, volume2, metric_type='MeanSquares', sampling_percentage=sampling_pc)
    elif metric == 'dist_corr':
        val = ants.image_similarity(volume1, volume2, metric_type='Correlation', sampling_percentage=sampling_pc)
    elif metric == 'mattes_mi':
        val = ants.image_similarity(volume1, volume2, metric_type='MattesMutualInformation', sampling_percentage=sampling_pc)
    elif metric == 'joint_hist_mi':
        val = ants.image_similarity(volume1, volume2, metric_type='JointHistogramMutualInformation', sampling_percentage=sampling_pc)
    else:
        val = None

    if debug:
        end_time = time.perf_counter()
        elp_time = end_time - start_time
        print(f'§§§§§§§§§§§§ GET_METRICS: Elapsed time: {elp_time:0.2f}[s]')
        print(''.join(['§']*100))


    return val

def getenv():
    """
    Requires sys and os modules:
    import sys
    import os
    Possible values for sys.platform are (https://docs.python.org/3/library/sys.html & https://stackoverflow.com/questions/446209/possible-values-from-sys-platform)
    ┍━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━┑
    │  System             │ Value               │
    ┝━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━┥
    │ Linux               │ linux or linux2 (*) │
    │ Windows             │ win32               │
    │ Windows/Cygwin      │ cygwin              │
    │ Windows/MSYS2       │ msys                │
    │ Mac OS X            │ darwin              │
    │ OS/2                │ os2                 │
    │ OS/2 EMX            │ os2emx              │
    │ RiscOS              │ riscos              │
    │ AtheOS              │ atheos              │
    │ FreeBSD 7           │ freebsd7            │
    │ FreeBSD 8           │ freebsd8            │
    │ FreeBSD N           │ freebsdN            │
    │ OpenBSD 6           │ openbsd6            │
    │ AIX                 │ aix (**)            │
    ┕━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━┙
    """
    if sys.platform == 'win32':
        env_home = 'HOMEPATH'
    elif (sys.platform == 'darwin') | (sys.platform == 'linux'):
        env_home = 'HOME'
    HOMEPATH = os.getenv(env_home)
    
    return HOMEPATH

def check_path_exist(path, file=False):
    """
    Flag FILE indicates the path contains a file name (FLAG=TRUE) or the path only points to a folder (FLAG=FALSE (Default))
    """
    if file:
        is_path = os.path.isfile(path)
    else:
        is_path = os.path.isdir(path)

    print(f'{"OK:" if is_path else "ERROR:"} Path to {"file" if file else "folder"} {path} does{"" if is_path else " NOT"} exist')

    return is_path

In [17]:
HOMEPATH = getenv()
SRCPATH = os.path.join(HOMEPATH, 'Data', 'fMRIBreastData')

DCMSRCFLDR = 'StudyData'
TESTSFLDR = 'tests'
REFTEST = 'Test000'
OUTPUTFLDR = 'results'
IMGSRCFLDR = 'datasets'

input_path = os.path.join(SRCPATH, TESTSFLDR)
save_path = os.path.join(SRCPATH, OUTPUTFLDR)
# Check whether the folder SAVEPATH exists or not, if not, attempts to create it
os.makedirs(save_path, exist_ok=True)

In [18]:
DEBUGMODE = True
BATCHMODE = True # TRUE: it runs all dataset within INPUT_PATH; FALSE (DEFAULT): allows to pick a specific dataset to register

In [19]:
# Datafolder structure is:
display_tree(input_path)

tests/
├── Test000/
│   ├── datasets/
│   │   ├── CR-ANON68760/
│   │   │   ├── CR-Post-Treatment-20230120/
│   │   │   │   ├── CR-Post-Treatment-20230120.nii.gz
│   │   │   │   └── landmarks/
│   │   │   │       ├── CR-Post-Treatment-20230120-Slices077-115-Frame002.jpg
│   │   │   │       ├── RoiSet-Frame001.zip
│   │   │   │       └── RoiSet-Frame002.zip
│   │   │   └── CR-Pre-Treatment-20221212/
│   │   │       ├── CR-Pre-Treatment-20221212.nii.gz
│   │   │       └── landmarks/
│   │   │           ├── CR-Pre-Treatment-20221212-Slices-071-105-106-Frame002.jpg
│   │   │           ├── RoiSet-Frame001.zip
│   │   │           └── RoiSet-Frame002.zip
│   │   ├── DC-ANON97378/
│   │   │   ├── DC-Post-Treatment-20230726/
│   │   │   │   ├── DC-Post-Treatment-20230726.nii.gz
│   │   │   │   └── landmarks/
│   │   │   │       ├── DC-Post-Treatment-20230726-Slices069-Frame002.jpg
│   │   │   │       ├── RoiSet-Frame001.zip
│   │   │   │       └── RoiSet-Frame002.zip
│   │   │   └── DC-Pre-Trea

In [20]:
# TEST000 contains just the raw (unregistered) data. Any other tests is described in the corresponding descrption.json file
tests_available = sorted(list_folder_content(input_path))
if not BATCHMODE:
    default_idx_test = 0
    print('Select a folder from the tests available:')
    display_folder_list(tests_available)
    test_idx = input(f'Pick a valid index to select a Test folder {tuple(range(len(tests_available)))} or type "x" to use the default:')
    if test_idx == 'x':
        print(f'Selecting index {default_idx_test} (default value)')
        test_idx = default_idx_test
    else:
        test_idx = int(test_idx)
    tests_to_process = [tests_available[test_idx]]
    print(f'Will process test {tests_to_process[0]}')
else:
    tests_to_process = tests_available
    print('Will process all test data in the folder:')
    print('\n'.join(tests_to_process))


Will process all test data in the folder:
Test000
Test001
Test002
Test003
Test004
Test005
Test006
Test007
Test008
Test009
Test010


In [21]:
# Within Test the image data is in DATASETS and here we pick up a patient and will process all visits
if not BATCHMODE:
    path_to_patients = os.path.join(input_path, tests_to_process[0], IMGSRCFLDR)
    patient_list = sorted(list_folder_content(path_to_patients))
    default_idx_patient = 0
    print('Select a a patient from the list:')
    display_folder_list(patient_list)

    patient_idx = input(f'Pick a valid index to select a Patient {tuple(range(len(patient_list)))} or type "x" to use the default:')
    if patient_idx == 'x':
        print(f'Selecting index {default_idx_patient} (default value)')
        patient_idx = default_idx_patient
    else:
        patient_idx = int(patient_idx)
    patients_to_process = [patient_list[patient_idx]]

    print(f'Will process Patient {patients_to_process[0]}')
# else:
    # If running in Batch Mode, the patient list should be defined from the content on each test folder, i.e. inside the processing loop, not here
    # path_to_refdataset = os.path.join(input_path, REFTEST, IMGSRCFLDR)
    # patients_to_process = sorted(list_folder_content(path_to_refdataset))
    # print('Will process the patient data on each Test folder')

Will process the patient data on each Test folder


In [22]:
# Setup DataFrame to store the results
statistics = {'mi': 'Mutual Information',
              'mean_sq': 'Mean Squares', 
              'dist_corr':'Correlation', 
              'mattes_mi':'Mattes Mutual Information', 
              'joint_hist_mi':'Joint Histogram Mutual Information'}
sampling_fraction = 0.5

hdr_row = ['TestNro', 'PatientID','PatientVisit', 'FixedVolume', 'Registered2Itself', 'TimePoint'] + list(statistics.values())

stats_rows = []


In [25]:
# Now loop over the test(s) and patient(s) - even if not running in Batch Mode (in this case is a list of size 1, which can still be looped through)
print(f'Time at the start: {time.ctime()}')
init_time = time.perf_counter()
for test_to_process in tests_to_process:
    # Load the description file from the corresponding test: 
    description = json.loads(open(os.path.join(input_path, test_to_process, 'description.json')).read())
    # Identify the fixed volume (remember that the index is volume-1)
    reference_volume = description['Registration Details']['reference volume']
    is_reg_to_itself = description['Registration Details']['register fixed']
    ref_vol_idx = reference_volume - 1
    path_to_dataset = os.path.join(input_path, test_to_process, IMGSRCFLDR)
    if BATCHMODE:
        patient_list = sorted(list_folder_content(path_to_dataset))

    for patient_to_process in patients_to_process:
        folders_in_patient = os.path.join(path_to_dataset, patient_to_process)
        visits = list_folder_content(folders_in_patient)
        for visit in visits:
            path_to_nii = os.path.join(folders_in_patient, visit)
            nii_folder_name = '.'.join([visit, 'nii','gz'])
            nii_full_path =  os.path.join(path_to_nii, nii_folder_name)
            # Does the 4D volume exist?
            exist_nii = os.path.isfile(nii_full_path)
            if exist_nii:
                print(f'Processing file {nii_full_path}  ({test_to_process}, {nii_folder_name}), please wait...')
                # Load the volume:
                if DEBUGMODE:
                        print(''.join(['*']*100))
                        print('Timing IMAGE_READ (vol_to_proc)...')
                        start_time = time.perf_counter()
                vol_to_process = ants.image_read(nii_full_path)
                spacing, origin = [vol_to_process.spacing[:-1], vol_to_process.origin[:-1]]
                if DEBUGMODE:
                        end_time = time.perf_counter()
                        elp_time = end_time - start_time
                        print(f'\tElapsed time: {elp_time:0.2f}[s]')
                        print(''.join(['*']*100))

                # To ensure a comparable comparison across tests, load the reference volume from the unregistered data:
                print(f"path to fixed volume: {nii_full_path.replace(test_to_process,'Test000')}")
                if DEBUGMODE:
                        print(''.join(['*']*100))
                        print('Timing IMAGE_READ (fixed_volume)...')
                        start_time = time.perf_counter()
                fixed_volume = ants.image_read(nii_full_path.replace(test_to_process,'Test000'))
                if DEBUGMODE:
                        end_time = time.perf_counter()
                        elp_time = end_time - start_time
                        print(f'\tElapsed time: {elp_time:0.2f}[s]')
                        print(''.join(['*']*100))

                # ref_vol = vol_to_process[:, :, :, ref_vol_idx]
                ref_vol = fixed_volume[:, :, :, ref_vol_idx] 
                if DEBUGMODE:
                        print(''.join(['*']*100))
                        print('Timing Image Conversion FROM_NUMPY (fixed_volume)...')
                        start_time = time.perf_counter()
                ref_3d = ants.from_numpy(ref_vol, 
                                        spacing=spacing,
                                        origin=origin)
                if DEBUGMODE:
                        end_time = time.perf_counter()
                        elp_time = end_time - start_time
                        print(f'\tElapsed time: {elp_time:0.2f}[s]')
                        print(''.join(['*']*100))

                for idx_time in range(vol_to_process.shape[-1]):
                    if DEBUGMODE:
                            print(''.join(['*']*100))
                            print('Timing Image Conversion FROM_NUMPY (moving_volume)...')
                            start_time = time.perf_counter()
                    mov_3d = ants.from_numpy(vol_to_process[:,:,:,idx_time],
                                            spacing=spacing, 
                                            origin=origin)
                    print(f'Comparison between t[{ref_vol_idx+1}] & t[{idx_time+1}]:')
                    row_to_append = [test_to_process, patient_to_process, visit, reference_volume, is_reg_to_itself, idx_time+1]
                    if DEBUGMODE:
                            end_time = time.perf_counter()
                            elp_time = end_time - start_time
                            print(f'\tElapsed time: {elp_time:0.2f}[s]')
                            print(''.join(['*']*100))
                    if DEBUGMODE:
                            print(''.join(['*']*100))
                            print('Timing Stats calculation ...')
                            start_time = time.perf_counter()
                    for statsID, statsName in statistics.items():
                        statsVal = get_metrics(ref_3d, mov_3d, metric=statsID, debug=DEBUGMODE, sampling_pc=sampling_fraction)
                        row_to_append.append(statsVal)
                    if idx_time == ref_vol_idx:
                        print('(As positive control, the metric(s) are also calculated between the fixed volume and itself)')
                    if DEBUGMODE:
                        end_time = time.perf_counter()
                        elp_time = end_time - start_time
                        print(f'\tElapsed time: {elp_time:0.2f}[s]')
                        print(row_to_append)
                        print(''.join(['*']*100))

                    stats_rows.append(row_to_append)
            else:
                print(f'File {nii_folder_name} in {path_to_nii} does not exist, nothing done')
                continue
            print('Just taking a break...')
            time.sleep(1)
            print('ok, ready to go...')

final_time = time.perf_counter()
print('Creating the dataframe and saving the results...')
df = pd.DataFrame(columns=hdr_row, data=stats_rows)
df.head
if BATCHMODE:
    csv_filename = f'summary_results_metric_sampling_{sampling_fraction}.csv'
else:
    csv_filename = f'summary_results_{tests_to_process[0]}_{patients_to_process[0]}.csv'
df.to_csv(os.path.join(save_path, csv_filename), index_label='IdxKey')
elapsed_time = final_time - init_time
print(f'Elapsed time to summarise the datasets: {elapsed_time:0.2f}[s] ({timedelta(seconds=elapsed_time)})')
print(f'Time at the end: {time.ctime()}')
print('All done, check the results. Bye!')

Time at the start: Fri Mar 22 23:22:13 2024
Processing file /Users/joseulloa/Data/fMRIBreastData/tests/Test000/datasets/CR-ANON68760/CR-Pre-Treatment-20221212/CR-Pre-Treatment-20221212.nii.gz  (Test000, CR-Pre-Treatment-20221212.nii.gz), please wait...
****************************************************************************************************
Timing IMAGE_READ (vol_to_proc)...
	Elapsed time: 3.50[s]
****************************************************************************************************
path to fixed volume: /Users/joseulloa/Data/fMRIBreastData/tests/Test000/datasets/CR-ANON68760/CR-Pre-Treatment-20221212/CR-Pre-Treatment-20221212.nii.gz
****************************************************************************************************
Timing IMAGE_READ (fixed_volume)...
	Elapsed time: 2.84[s]
****************************************************************************************************
************************************************************************

In [24]:
if BATCHMODE:
    csv_filename = f'summary_results_metric_sampling_{sampling_fraction}.csv'
else:
    csv_filename = f'summary_results_{tests_to_process[0]}_{patients_to_process[0]}.csv'
df.to_csv(os.path.join(save_path, csv_filename), index_label='IdxKey')
elapsed_time = final_time - init_time
print(f'Elapsed time to summarise the datasets: {elapsed_time:0.2f}[s] ({timedelta(seconds=elapsed_time)})')
print(f'Time at the end: {time.ctime()}')
print('All done, check the results. Bye!')

Elapsed time to summarise the datasets: -115.75[s] (-1 day, 23:58:04.249210)
Time at the end: Fri Mar 22 22:59:48 2024
All done, check the results. Bye!
