# *Initial* **Setup**

## **Library** *Settings*

In [None]:
# Library Download
%pip install --upgrade pip
%pip install numpy
%pip install argparse
%pip install pandas
%pip install torch
%pip install matplot
%pip install plotly
%pip install itk
%pip install itkwidgets
%pip install h5py
%pip install h5pyViewer
%pip install nilearn
%pip install dipy
%pip install openpyxl
%pip install tabulate

In [1]:
# Library Import
import os
import sys
import pickle
import psutil
import itertools
import numpy as np
import argparse
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import itk
import itkwidgets
import h5py
import nilearn
import dipy
import warnings

In [2]:
# Functionality Import
from pathlib import Path
from typing import List, Literal, Optional, Callable, Dict, Literal, Optional, Union, Tuple
from torch.utils.data import DataLoader, Dataset, TensorDataset
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from nilearn.image import smooth_img, load_img
from nilearn.masking import apply_mask, unmask
#from dipy.reconst.shm import cart2sphere, real_sh_descoteaux_from_index, sph_harm_ind_list
from ipywidgets import interactive, IntSlider
#from PIL import Image
from tabulate import tabulate
warnings.filterwarnings('ignore')

## **Control** *Station*

In [6]:
# Parser Initialization
parser = argparse.ArgumentParser(
    description = "MUDIVisualizer")

# Filepath Arguments
path = parser.add_argument_group('Required Filepaths')
path.add_argument('--param_filepath', type = Path, default = '../Raw Data/parameters_new.xlsx',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Parameters')
path.add_argument('--mask_folderpath', type = Path, default = '../Patient Mask',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Parameters')
path.add_argument('--patient_folderpath', type = Path, default = '../Patient Data',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Patient Information')
path.add_argument('--info_filepath', type = Path, default = '../Raw Data/header1_.csv',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Parameters')


# ----------------------------------------------------------------------------------------------------------------------------


parse = parser.parse_args("")
parse.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# *Data* **Access**

## *Raw* -> *Patient* **Data Conversion**

In [None]:
# Dataset Initialization Example
data = hMUDI(parse.data_filepath,        # Dataset Initialization
            parse.param_filepath,       # using Filepaths indicated
            parse.patient_filepath,     # in the Parser Arguments
            parse.y_filepath)

In [None]:
# Patient Loop
for p in range(data.patientInfo.shape[0] - 1):

    # Patient Data Access from 
    pN = data.patientInfo['Patient'].iloc[p]
    print(f"Accessing Data for Patient {pN}...")
    pData = data.data[:].iloc[np.where(data.y == pN)[0]].T
    assert(pData.shape[1] == data.patientInfo['Voxels'].iloc[p], f"Patient Data Dimensions not Correct!")

    # Patient Data File Saving
    pData.to_csv(f"Patient Data/p{pN}.csv")
    del pN, pData

## **Horizontal** / **Voxel** *Reader*

In [None]:
# Horizontal MUDI Dataset Initialization Class
class hMUDI(Dataset):

    # Constructor Function
    def __init__(
        self,
        dataset_filepath: Path,     # Path for File containing Dataset Voxel Values
        param_filepath: Path,       # Path for File containing all 1344 Parameter Settings Combination
        patient_filepath: Path,     # Path for File Containing Number of Voxels per Patients
        y_filepath: Path,           # Path for File Containing Voxels per Patient
    ):

        # Class Variable Logging
        super(hMUDI).__init__()
        self.dataset_filepath = dataset_filepath
        self.param_filepath = param_filepath
        self.patient_filepath = patient_filepath
        self.y_filepath = y_filepath

        # Required Memory Space
        file_size = os.path.getsize(dataset_filepath)
        available_memory = psutil.virtual_memory().available
        assert(available_memory >= file_size
        ), f"ERROR: Dataset requires {file_size}b, but only {available_memory}b is available!"

        # Dataset's Parameter & Patient Info Access
        self.params = pd.read_excel(self.param_filepath)
        self.patientInfo = pd.read_csv(self.patient_filepath)
        self.patientInfo['Patient'].iloc[-1] = 'Total'
        self.y = pd.read_csv(self.y_filepath)
        self.y = pd.DataFrame(self.y['1'].values, columns = ['y'])
        self.num_patients = self.patientInfo.shape[0] - 1       # Total Number of Patients in Dataset
        self.num_params = self.params.shape[0]                  # Total Number of Parameters in Dataset

        # Dataset's Value Access
        if 'data' not in dir(self):
            print("Now Downloading Data...")
            with h5py.File(dataset_filepath, 'r') as dataset:
                self.data = pd.DataFrame(dataset.get('data1'))
                #self.data.columns = (self.params.T).values.tolist()
        

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////


    # Dataset Splitting Function
    def split(
        self,
        percentage: bool = False,       # Control Variable for the Format of Parameter Limits
        train_patients: int = 4,        # No. Patients used in Training
        train_params: int = 500,        # No. / Percentage of Parameters used in Training
        val_patients: int = 1,          # No. Patients used in Validation
        val_params: int = 10,           # No. / Percentage of Parameters used in Validation
        batch_size: int = 500,          # Sample Batch Size Value
    ):

        # General Assertions for Input Values
        print("Proceeding to Data Splitting...\n")
        assert( 0 < batch_size <= self.num_params                       # Limits for Batch Size 
        ), f"ERROR: Requested Batch Size not Supported!"
        assert( 0 < train_patients <= self.num_patients                 # Limits for No. of Training Patients
        ), f"ERROR: Training Patient Number not Supported!"
        assert( 0 < val_patients <= self.num_patients                   # Limits for No. of Validation Patients
        ), f"ERROR: Validation Patient Number not Supported!"
        assert( train_patients + val_patients <= self.num_patients      # Limits for No. of Selected Patients
        ), f"ERROR: Total Patient Number not Supported!"
        assert( 0 < train_params <= self.num_params                     # Limits for No. of Training Parameters
        ), f"ERROR: Training Parameter Number not Supported!"
        assert( 0 < val_params <= self.num_params                       # Limits for No. of Validation Parameters
        ), f"ERROR: Validation Parameter Number not Supported!"

        # Split Datasets' Vertical Shape Initialization
        self.train_patients = train_patients
        self.train_samples = sum(self.patientInfo['Voxels'].iloc[0 : train_patients])
        self.val_patients = val_patients
        self.val_samples = sum(self.patientInfo['Voxels'].iloc[train_patients : train_patients + val_patients])
        self.test_patients = self.num_patients - (train_patients + val_patients)
        self.test_samples = sum(self.patientInfo['Voxels'].iloc[train_patients + val_patients : -1])
        self.batch_size = batch_size
        
        # ----------------------------------------------------------------------------------------------------------------------------

        # Split Datasets' Vertical Shape Initialization (Percentage Value)
        if(percentage):
            
            # Assertions for Parameter Input Values
            assert( 0 < train_params <= 100                 # Percentage Limits for Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            assert(0 < val_params <= 100                    # Percentage Limits for Validation Parameters
            ), f"ERROR: Validation Parameter Number not Supported!"
            
            # Class Variable Logging
            self.trainTrain_params = train_params           # Percentage of Training Set's Training Parameters
            self.trainVal_params = 100 - train_params       # Percentage of Training Set's Validation Parameters
            self.valTrain_params = val_params               # Percentage of Validation Set's Training Parameters
            self.valVal_params = 100 - val_params           # Percentage of Validation Set's Validation Parameters
            if( self.test_patients != 0):
                self.testTrain_params = val_params                      # Percentage of Test Set's Training Parameters
                self.testVal_params = 100 - self.testTrain_params       # Percentage of Test Set's Validation Parameters
        
        # Split Datasets' Vertical Shape Initialization (Percentage Value)
        else:

            # Assertions for Parameter Input Values
            assert( 0 < train_params <= self.num_params                 # Percentage Limits for Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            assert(0 < val_params <= self.num_params                    # Percentage Limits for Validation Parameters
            ), f"ERROR: Validation Parameter Number not Supported!"
            
            # Class Variable Logging
            self.trainTrain_params = train_params                                   # Percentage of Training Set's Training Parameters
            self.trainVal_params = self.num_params - self.trainTrain_params         # Percentage of Training Set's Validation Parameters
            self.valTrain_params = val_params                                       # Percentage of Validation Set's Training Parameters
            self.valVal_params = self.num_params - self.valTrain_params             # Percentage of Validation Set's Validation Parameters
            if( self.test_patients != 0):
                self.testTrain_params = val_params                                  # Percentage of Test Set's Training Parameters
                self.testVal_params = self.num_params - self.testTrain_params       # Percentage of Test Set's Validation Parameter

        # ----------------------------------------------------------------------------------------------------------------------------

        # Dataset Vertical / Sample Splitting (Without Test Set)
        X_train, X_val, y_train, y_val = train_test_split(
            self.data, self.y,
            test_size = self.val_samples + self.test_samples,
            shuffle = False, random_state = 42)

        # Dataset Vertical / Sample Splitting (With Test Set)
        if(self.test_patients != 0):
            X_val, X_test, y_val, y_test = train_test_split(
                X_val, y_val,
                test_size = self.test_samples,
                shuffle = False, random_state = 42)

        # ----------------------------------------------------------------------------------------------------------------------------

        # Dataset Horizontal / Parameter Splitting | Training Set
        X_trainTrain, X_trainVal, self.params_trainTrain, self.params_trainVal = train_test_split(
                    X_train.T, self.params,
                    test_size = self.trainVal_params,
                    shuffle = parse.hShuffle, random_state = 42)
        self.X_trainTrain = pd.concat([X_trainTrain.T, y_train], axis = 1)
        self.X_trainVal = pd.concat([X_trainVal.T, y_train], axis = 1)
        
        # Dataset Horizontal / Parameter Splitting | Validation Set
        X_valTrain, X_valVal, self.params_valTrain, self.params_valVal = train_test_split(
                    X_val.T, self.params,
                    test_size = self.valVal_params,
                    shuffle = parse.hShuffle, random_state = 42)
        self.X_valTrain = pd.concat([X_valTrain.T, y_val], axis = 1)
        self.X_valVal = pd.concat([X_valVal.T, y_val], axis = 1)

        # Dataset Horizontal / Parameter Splitting | Testing Set
        if( self.test_patients != 0):
            X_testTrain, X_testVal, self.params_testTrain, self.params_testVal = train_test_split(
                        X_test.T, self.params,
                        test_size = self.testVal_params,
                        shuffle = parse.hShuffle, random_state = 42)
            self.X_testTrain = pd.concat([X_testTrain.T, y_test], axis = 1)
            self.X_testVal = pd.concat([X_testVal.T, y_test], axis = 1)
        
        # ----------------------------------------------------------------------------------------------------------------------------
        
        # DataLoader Building
        self.dl_trainTrain = DataLoader(torch.tensor(self.X_trainTrain.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
        self.dl_trainVal = DataLoader(torch.tensor(self.X_trainVal.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
        self.dl_valTrain = DataLoader(torch.tensor(self.X_valTrain.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
        self.dl_valVal = DataLoader(torch.tensor(self.X_valVal.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
        if (self.test_patients != 0):
            self.dl_testTrain = DataLoader(torch.tensor(self.X_testTrain.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
            self.dl_testVal = DataLoader(torch.tensor(self.X_testVal.values, dtype = torch.float32),
                                        batch_size = self.batch_size, shuffle = parse.vShuffle)
                                        
        #Split Datasets' Contents Report
        if(percentage):
            print(tabulate([[self.train_patients, self.train_samples, f"{(self.trainTrain_params / 100) * self.num_params} ({np.round(self.trainTrain_params, 2)}%)", f"{(self.trainVal_params / 100) * self.num_params} ({np.round(self.trainVal_params, 2)}%)"],
                            [self.val_patients, self.val_samples, f"{(self.valTrain_params / 100) * self.num_params} ({np.round(self.valTrain_params, 2)}%)", f"{(self.valVal_params / 100) * self.num_params} ({np.round(self.valVal_params, 2)}%)"],
                            [self.test_patients, self.test_samples, f"{(self.valTrain_params / 100) * self.num_params} ({np.round(self.valTrain_params, 2)}%)", f"{(self.valVal_params / 100) * self.num_params} ({np.round(self.valVal_params, 2)}%)"],
                            [self.num_patients, (self.train_samples + self.val_samples + self.test_samples), "", ""]],
                            headers = ['No. Patients', 'No. Voxels', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Validation Set', 'Test Set', 'Total'], tablefmt = 'fancy_grid'))
        else:
            print(tabulate([[self.train_patients, self.train_samples, f"{self.trainTrain_params} ({np.round((self.trainTrain_params / self.num_params) * 100, 2)}%)", f"{self.trainVal_params} ({np.round((self.trainVal_params   / self.num_params) * 100, 2)}%)"],
                            [self.val_patients, self.val_samples, f"{self.valTrain_params} ({np.round((self.valTrain_params / self.num_params) * 100, 2)}%)", f"{self.valVal_params} ({np.round((self.valVal_params   / self.num_params) * 100, 2)}%)"],
                            [self.test_patients, self.test_samples, f"{self.valTrain_params} ({np.round((self.valTrain_params / self.num_params) * 100, 2)}%)", f"{self.valVal_params} ({np.round((self.valVal_params   / self.num_params) * 100, 2)}%)"],
                            [self.num_patients, (self.train_samples + self.val_samples + self.test_samples), "", ""]],
                            headers = ['No. Patients', 'No. Voxels', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Validation Set', 'Test Set', 'Total'], tablefmt = 'fancy_grid'))


In [None]:
# Dataset Initialization Example
data = hMUDI(parse.data_filepath,        # Dataset Initialization
            parse.param_filepath,       # using Filepaths indicated
            parse.patient_filepath,     # in the Parser Arguments
            parse.y_filepath)
data.split()                            # Dataset Splitting using Default Values

# DataLoader Usage Example (Training Parameters for Training Set)
assert(len(data.dl_trainTrain) == int(np.ceil(data.train_samples / data.batch_size)))
for i, batch in enumerate(data.dl_trainTrain):
    print(f"Batch #{i + 1}: {batch.shape[0]} Voxels / Samples | {batch.shape[1]} Settings / Parameters")


## **1D Vertical** / **Image** *Reader*

In [None]:
# Vertical 1D MUDI Dataset Initialization Class
class v1DMUDI(Dataset):

    # Constructor / Initialization Function
    def __init__(
        self,
        patient_folderpath: Path,       # Path for Folder Containing Patient Data Files
        param_filepath: Path,           # Path for File containing all 1344 Parameter Settings Combination
        info_filepath: Path,            # Path for List of Patients and Corresponding Image Size inside Full Dataset
    ):

        # Parameter Value Access
        super(v1DMUDI).__init__()
        self.params = pd.read_excel(param_filepath)             # List of Dataset's Parameters
        self.num_params = self.params.shape[0]                  # Total Number of Parameters in Dataset

        # Patient Information Access
        self.patient_folderpath = patient_folderpath
        self.patient_info = pd.read_csv(info_filepath)          # List of Patients and Corresponding IDs & Image Sizes inside Full Dataset
        self.patient_info = self.patient_info[:-1]              # Eliminating the Last Row containing Useless Information from the Patient Information
        self.num_patients = self.patient_info.shape[0]          # Number of Patients inside Full Dataset


    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # 1D Image Pre-Processing Function
    def pre_process(
        self,
        img: pd.DataFrame,
    ):

        # Input Variable Assertions
        assert(img.ndim == 2), "ERROR: Input Image Shape not Supported! (2D Arrays only)"
        assert(self.pre_shape < img.shape[1]), "ERROR: Convolution Layer Size must be smaller than Original Image's no. of Voxels!"

        # Dimensionality Reduction
        img = np.array(img.values)
        img_final = self.pca.fit_transform(img)
        return pd.DataFrame(img_final)

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Patient Data Access & Splitting Function
    def split_patient(
        self,
        patient_number: int,                # Number for the Patient File being Read and Acquired (in Order)
        train_params: int = 500,            # Number / Percentage of Parameters to be used in the Training Section of the Patient
        percentage: bool = False,           # Control Variable for the Usage of Percentage Values in train_params
        sample_shuffle: bool = False,       # Ability to Shuffle the Samples inside both Training and Validation Datasets
    ):
        
        # Patient Data Access (including all Requirements)
        assert(0 <= patient_number < self.num_patients), f"ERROR: Input Patient not Found!"         # Assertion for the Existence of the Requested Patient
        patient_id = self.patient_info['Patient'].iloc[patient_number]                              # Patient ID contained within the Patient List
        patient_filepath = Path(f"{self.patient_folderpath}/p{patient_id}.csv")                     # Patient Filepath from detailed Folder
        assert(patient_filepath.exists()                                                            # Assertion for the Existence of Patient File in said Folder
        ), f"Filepath for Patient {patient_id} is not in the Dataset!"
        file_size = os.path.getsize(patient_filepath)                                               # Memory Space occupied by Patient File
        available_memory = psutil.virtual_memory().available                                        # Memory Space Available for Computation
        assert(available_memory >= file_size                                                        # Assertion for the Existence of Available Memory Space
        ), f"ERROR: Dataset requires {file_size}b, but only {available_memory}b is available!"
        pX = pd.read_csv(patient_filepath); del pX['Unnamed: 0']                                    # Full Patient Data

        # ----------------------------------------------------------------------------------------------------------------------------

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        if(percentage):
            assert(0 < train_params <= 100                              # Percentage Limits for Number of Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            train_params = train_params / 100                           # Percentage Value for Training Parameters
            val_params = 1 - train_params                               # Percentage Value for Validation Parameters

        # Computation of Training & Validation Parameter Numbers (Numerical Input)
        else:
            assert(0 < train_params <= self.num_params                  # Numerical Limits for Number of Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            val_params = self.num_params - train_params                 # Numerical Value for Validation Parameters

        # ----------------------------------------------------------------------------------------------------------------------------

        # Patient Dataset Splitting into Training & Validation Sets
        py = self.params; py['Patient'] = patient_id                # Patient Data Label Handling
        pX = self.pre_process(pX)                                   # Patient Data Preprocessing
        pX_train, pX_val, py_train, py_val = train_test_split(  pX, py,
                                                                test_size = val_params,
                                                                shuffle = sample_shuffle,
                                                                random_state = 42)
        return pX_train, pX_val, py_train, py_val
        

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Dataset Splitting Function
    def split(
        self,
        test_patients: int = 1,             # Number of Patients to be used in the Test Set
        train_params: int = 500,            # Number / Percentage of Parameters for the Training of the Training Set
        test_params: int = 20,              # Number / Percentage of Parameters for the Training of the Test Set
        pre_shape: int = 1200,              # Intermediate Dataset Shape as of Pre-Processing
        percentage: bool = False,           # Control Variable for the Usage of Percentage Values in train_params
        patient_shuffle: bool = False,      # Ability to Shuffle the Patients that compose both Training / Validation and Test Datasets
        sample_shuffle: bool = False,       # Ability to Shuffle the Samples inside both Training / Validation and Test Datasets
    ):

        # Patient Number Variable Logging
        assert(0 < test_patients <= self.num_patients               # Limits for Number of Test Set Patients
        ), f"ERROR: Test Patient Number not Supported!"
        self.train_patients = self.num_patients - test_patients     # Number of Patients to be used in the Training & Validation Sets
        self.test_patients = test_patients                          # Number of Patients to be used in the Test Sets
        self.pre_shape = pre_shape

        # Pre-Processing Dimensionality Reduction + Patient Shuffling Feature
        self.pca = PCA(n_components = self.pre_shape)
        if(patient_shuffle):
            self.patient_info = self.patient_info.iloc[np.random.permutation(len(self.patient_info))]

        # ----------------------------------------------------------------------------------------------------------------------------

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        if(percentage):
            assert(0 < train_params <= 100                              # Percentage Limits for Number of Training Set's Parameters
            ), f"ERROR: Training Set's Parameter Number not Supported!"
            self.trainTrain_params = train_params                       # Percentage Value for Training Set's Training Parameters
            self.trainVal_params = 100 - train_params                   # Percentage Value for Training Set's Validation Parameters
            assert(0 < test_params <= 100                               # Percentage Limits for Number of Test Set's Parameters
            ), f"ERROR: Test Set's Parameter Number not Supported!"
            self.testTrain_params = test_params                         # Percentage Value for Test Set's Training Parameters
            self.testVal_params = 100 - test_params                     # Percentage Value for Test Set's Validation Parameters

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        else:
            assert(0 < train_params <= self.num_params                  # Numerical Limits for Number of Training Set's Parameters
            ), f"ERROR: Training Set's Parameter Number not Supported!"
            self.trainTrain_params = train_params                       # Numerical Value for Training Set's Training Parameters
            self.trainVal_params = self.num_params - train_params       # Numerical Value for Training Set's Validation Parameters
            assert(0 < test_params <= self.num_params                   # Numerical Limits for Number of Test Set's Parameters
            ), f"ERROR: Test Set's Parameter Number not Supported!"
            self.testTrain_params = test_params                         # Numerical Value for Test Set's Training Parameters
            self.testVal_params = self.num_params - test_params         # Numerical Value for Test Set's Validation Parameters

        # ----------------------------------------------------------------------------------------------------------------------------

        # Training & Validation Sets Building
        self.train_set = dict.fromkeys(('X_train', 'X_val', 'y_train', 'y_val'))
        X_train = np.empty([0, self.pre_shape]); X_val = np.empty([0, self.pre_shape])
        y_train = np.empty([0, 7]); y_val = np.empty([0, 7])
        for p in range(self.train_patients):

            # Training Patient Data Access & Treatment
            print(f"Adding Patient {self.patient_info['Patient'].iloc[p]}'s Data to the Training Set...")       # Display of the Patient being Added to the Test Set
            pX_train, pX_val, py_train, py_val = self.split_patient(patient_number = p,
                                                                    train_params = self.trainTrain_params,
                                                                    percentage = percentage,
                                                                    sample_shuffle = sample_shuffle)
            X_train = np.concatenate((X_train, pX_train), axis = 0); X_val = np.concatenate((X_val, pX_val), axis = 0)
            y_train = np.concatenate((y_train, py_train), axis = 0); y_val = np.concatenate((y_val, py_val), axis = 0)
        
        self.train_set['X_train'] = pd.DataFrame(X_train); self.train_set['X_val'] = pd.DataFrame(X_val)
        self.train_set['y_train'] = pd.DataFrame(y_train); self.train_set['y_val'] = pd.DataFrame(y_val)
        del X_train, X_val, y_train, y_val, pX_train, pX_val, py_train, py_val

        # ----------------------------------------------------------------------------------------------------------------------------

        # Test Set Building
        self.test_set = dict.fromkeys(('X_train', 'X_val', 'y_train', 'y_val'))                             # Creation of Empty Dictionary to Fit Patient Data
        X_train = np.empty([0, self.pre_shape]); X_val = np.empty([0, self.pre_shape])
        y_train = np.empty([0, 7]); y_val = np.empty([0, 7])
        for p in range(self.train_patients, self.train_patients + self.test_patients):

            # Training Patient Data Access & Treatment
            print(f"Adding Patient {self.patient_info['Patient'].iloc[p]}'s Data to the Test Set...")       # Display of the Patient being Added to the Test Set
            pX_train, pX_val, py_train, py_val = self.split_patient(patient_number = p,
                                                                    train_params = self.testTrain_params,
                                                                    percentage = percentage,
                                                                    sample_shuffle = sample_shuffle)
            X_train = np.concatenate((X_train, pX_train), axis = 0); X_val = np.concatenate((X_val, pX_val), axis = 0)
            y_train = np.concatenate((y_train, py_train), axis = 0); y_val = np.concatenate((y_val, py_val), axis = 0)
        
        self.test_set['X_train'] = pd.DataFrame(X_train); self.test_set['X_val'] = pd.DataFrame(X_val)
        self.test_set['y_train'] = pd.DataFrame(y_train); self.test_set['y_val'] = pd.DataFrame(y_val)
        del X_train, X_val, y_train, y_val, pX_train, pX_val, py_train, py_val

        # ----------------------------------------------------------------------------------------------------------------------------

        # Split Datasets' Content Report
        if(percentage):
            print(tabulate([[self.train_patients, f"{(self.trainTrain_params / 100) * self.num_params} ({self.trainTrain_params}%)", f"{(self.trainVal_params / 100) * self.num_params} ({self.trainVal_params}%)"],
                            [self.test_patients, f"{(self.testTrain_params / 100) * self.num_params} ({self.testTrain_params}%)", f"{(self.testVal_params / 100) * self.num_params} ({self.testVal_params}%)"]],
                            headers = ['No. Patients', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Test Set'], tablefmt = 'fancy_grid'))
        else:
            print(tabulate([[self.train_patients, f"{self.trainTrain_params} ({np.round((self.trainTrain_params / self.num_params) * 100, 2)}%)", f"{self.trainVal_params} ({np.round((self.trainVal_params / self.num_params) * 100, 2)}%)"],
                            [self.test_patients, f"{self.testTrain_params} ({np.round(self.testTrain_params / self.num_params, 2)}%)", f"{self.testVal_params} ({np.round(self.testVal_params / self.num_params, 2)}%)"]],
                            headers = ['No. Patients', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Test Set'], tablefmt = 'fancy_grid'))

    
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Dataset Saving Function
    def save(
        self,
        path: Path,
        version: int = 0,
    ):
        f = open(f'{path}/Vertical 1D MUDI (Version {version})', 'wb')
        pickle.dump(self, f)
        f.close

    # ----------------------------------------------------------------------------------------------------------------------------

    # Dataset Loading Function
    def load(
        path: Path,
        version: int = 0,
    ):
        f = open(f'{path}/Vertical 1D MUDI (Version {version})', 'rb')
        mudi = pickle.load(f)
        f.close
        return mudi

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

In [None]:
# Dataset Initialization Example
mudi = v1DMUDI( parse.patient_folderpath,
                parse.param_filepath,
                parse.info_filepath)
mudi.split()

In [None]:
# Dataset Loading
mudi = v1DMUDI.load(Path(f"{main_folderpath}Saved Data"), version = 0)

## **3D Vertical** / **Image** *Reader*

In [91]:
# Vertical 3D MUDI Dataset Initialization Class
class v3DMUDI(Dataset):

    # Constructor / Initialization Function
    def __init__(
        self,
        patient_folderpath: Path,                   # Path for Folder Containing Patient Data Files
        mask_folderpath: Path,                      # Path for Folder Containing Mask Data Files
        param_filepath: Path,                       # Path for File containing all 1344 Parameter Settings Combination
        info_filepath: Path,                        # Path for List of Patients and Corresponding Image Size inside Full Dataset
    ):

        # Parameter Value Access
        super(v3DMUDI).__init__()
        self.params = pd.read_excel(param_filepath)             # List of Dataset's Parameters
        self.num_params = self.params.shape[0]                  # Total Number of Parameters in Dataset

        # Patient Information Access
        self.patient_folderpath = patient_folderpath
        self.mask_folderpath = mask_folderpath
        self.patient_info = pd.read_csv(info_filepath)          # List of Patients and Corresponding IDs & Image Sizes inside Full Dataset
        self.patient_info = self.patient_info[:-1]              # Eliminating the Last Row containing Useless Information from the Patient Information
        self.num_patients = self.patient_info.shape[0]          # Number of Patients inside Full Dataset


    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    
    # Patient Data Access Function
    def get_patient(
        self,
        patient_number: int,                # Number for the Patient File being Read and Acquired (in Order)
    ):

        # Patient Data Access (including all Requirements)
        assert(0 <= patient_number < self.num_patients), f"ERROR: Input Patient not Found!"         # Assertion for the Existence of the Requested Patient
        patient_id = self.patient_info['Patient'].iloc[patient_number]                              # Patient ID contained within the Patient List
        patient_filepath = Path(f"{self.patient_folderpath}/p{patient_id}.csv")                     # Patient Filepath from detailed Folder
        mask_filepath = Path(f"{self.mask_folderpath}/p{patient_id}.nii")                           # Mask Filepath from detailed Folder
        assert(patient_filepath.exists()                                                            # Assertion for the Existence of Patient File in said Folder
        ), f"Filepath for Patient {patient_id} is not in the Dataset!"
        assert(mask_filepath.exists()                                                               # Assertion for the Existence of Mask File in said Folder
        ), f"Filepath for Mask {patient_id} is not in the Dataset!"
        file_size = os.path.getsize(patient_filepath)                                               # Memory Space occupied by Patient File
        mask_size = os.path.getsize(mask_filepath)                                                  # Memory Space occupied by Mask File
        available_memory = psutil.virtual_memory().available                                        # Memory Space Available for Computation
        assert(available_memory >= (file_size + mask_size)                                          # Assertion for the Existence of Available Memory Space
        ), f"ERROR: Dataset requires {file_size + mask_size}b, but only {available_memory}b is available!"
        pX = pd.read_csv(patient_filepath); del pX['Unnamed: 0']                                    # Full Patient Data
        pMask = load_img(mask_filepath)                                                             # Patient Mask Data
        pX = unmask(pX, pMask); pX = pX.get_fdata()                                                 # Unmasking of Full Patient Data
        pX = np.transpose(pX, (3, 2, 0, 1))                                                         # Full Patient Data Reshapping
        return pX

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Pre-Processing Alternative Method: Interpolation
    def prep_interpolation(
        self,
        data: np.array,
        final_shape: np.array = np.array((85, 92, 56)),
    ):

        # Input Data Assertions
        assert(data.ndim >= 3), "ERROR: Pre-Processing Input Data has the Wrong Dimmensions"
        assert(len(final_shape) == 3), "ERROR: Pre-Processing Output has the Wrong Dimmensions"

        #
        final_data = np.empty((data.shape[0], final_shape[0], final_shape[1], final_shape[2]))
        ratio = np.divide(data.shape[1::], final_shape)
        for sample, x, y, z in itertools.product(   range(data.shape[0]),
                                                    range(final_shape[0]),
                                                    range(final_shape[1]),
                                                    range(final_shape[2])):
            final_data[sample][x][y][z] = data[sample][int(x * ratio[0])][int(y * ratio[1])][int(z * ratio[2])]
        return final_data
    
    # ----------------------------------------------------------------------------------------------------------------------------

    # Pre-Processing Alternative Method: Zero-Padding
    def prep_zeroPadding(
        data: np.array,
        final_shape: np.array = np.array((83, 92, 56)),
        #final_shape: np.array = np.array((90, 100, 60)),
    ):

        # Input Data Assertions
        assert(data.ndim >= 3), "ERROR: Pre-Processing Input Data has the Wrong Dimmensions"
        assert(len(final_shape) == 3), "ERROR: Pre-Processing Output has the Wrong Dimmensions"
        assert(len(np.where((final_shape >= data.shape[1::]) == False)[0]) == 0
        ), "ERROR: Pre-Processed Output Data Shape < Original Input Data Shape"

        # Zero-Padding Implementation
        print(np.subtract(final_shape, data.shape[1::]))
        padding = (np.hstack((0, np.subtract(final_shape, data.shape[1::]))) / 2).astype(np.float32)
        padding = padding.reshape((1, -1)).T + np.array([0, 0])
        padding[:, 0] = np.ceil(padding[:, 0]); padding[:, 1] = np.floor(padding[:, 1])
        final_data = np.pad(data, padding.astype(np.int32), 'constant')
        return final_data

    # ----------------------------------------------------------------------------------------------------------------------------

    # Pre-Processing Alternative Method: Convolutional Layer
    """
    class preProcess(nn.Module):

        # Constructor / Initialization Function
        def __init__(
            self,
            data: pd.DataFrame,
            pre_shape: int = 512,
        ):

            # Parameter Value Access
            super(preProcess).__init__()
            self.data = data.T
            assert(data.ndim == 4), "ERROR: Input Image Shape not Supported! (4D Arrays only)"
            assert(self.pre_shape < (data.shape[1] * data.shape[2] * data.shape[3])
            ), "ERROR: Convolution Layer Size must be smaller than Original Image's no. of Voxels!"

            # Convolutional Layer Structure
            print(self.data.shape)
            out = self.conv_layer(data.shape[0], )
            print(out.shape)

        # ----------------------------------------------------------------------------------------------------------------------------
        
        # Convolutional Layer 
        def conv_layer(
            self,
            in_channels: int,
            out_channels:int,
        ):

            return nn.Sequential(
                nn.Conv3d(  in_channels, out_channels,
                            kernel_size = (3, 3, 3),
                            padding = 0),
                nn.MaxPool3d((2, 2, 2)),
                nn.Dropout(p = 0.15), )
                #nn.LeakyReLU(),
                #nn.MaxPool3d((2, 2, 2)),)
    """
        
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # 2D Conversion Functionality
    def slice_(
        self,
        data: np.array,
        interval: np.array = [10, 45],
    ):

        # Slice Selection & 3D to 2D Conversion
        assert(data.ndim >= 3), "ERROR: Pre-Processing Input Data has the Wrong Dimmensions"
        assert(len(interval) == 2), "ERROR: Slice Interval has the Wrong Dimmensions"
        interval = range(interval[0], interval[1]); data = data[:, interval, :, :]
        data = data.reshape((data.shape[0] * data.shape[1], data.shape[2], data.shape[3]))
        return data

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Patient Data Splitting Function
    def split_patient(
        self,
        patient_number: int,                # Number for the Patient File being Read and Acquired (in Order)
        train_params: int = 500,            # Number / Percentage of Parameters to be used in the Training Section of the Patient
        percentage: bool = False,           # Control Variable for the Usage of Percentage Values in train_params
        sample_shuffle: bool = False,       # Ability to Shuffle the Samples inside both Training and Validation Datasets
    ):

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        if(percentage):
            assert(0 < train_params <= 100                              # Percentage Limits for Number of Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            train_params = train_params / 100                           # Percentage Value for Training Parameters
            val_params = 1 - train_params                               # Percentage Value for Validation Parameters

        # Computation of Training & Validation Parameter Numbers (Numerical Input)
        else:
            assert(0 < train_params <= self.num_params                  # Numerical Limits for Number of Training Parameters
            ), f"ERROR: Training Parameter Number not Supported!"
            val_params = self.num_params - train_params                 # Numerical Value for Validation Parameters

        # ----------------------------------------------------------------------------------------------------------------------------

        # Patient Data Access & Pre-Processing
        pX = self.get_patient(patient_number)                                                   # Patient Data Access
        py = self.params; py['Patient'] = self.patient_info['Patient'].iloc[patient_number]     # Patient Data Label Handling
        if(self.pre_processing == 'Zero Padding'): pX = self.prep_zeroPadding(pX)               # Zero Padding Pre-Processing
        elif(self.pre_processing == 'Interpolation'): pX = self.prep_interpolation(pX)          # Interpolation Pre-Processing
        #elif(self.pre_processing == 'CNN'): pX = self.prep_cnn(pX)                             # CNN Pre-Processing

        # Patient Dataset Splitting into Training & Validation Sets
        if(self.slice): pX = self.slice_(pX)
        pX_train, pX_val, py_train, py_val = train_test_split(  pX, py,
                                                                test_size = val_params,
                                                                shuffle = sample_shuffle,
                                                                random_state = 42)
        return pX_train, pX_val, py_train, py_val
        

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Dataset Splitting Function
    def split(
        self,
        test_patients: int = 1,                     # Number of Patients to be used in the Test Set
        train_params: int = 500,                    # Number / Percentage of Parameters for the Training of the Training Set
        test_params: int = 20,                      # Number / Percentage of Parameters for the Training of the Test Set
        pre_processing: str = 'Zero Padding',       # Control Variable for the Choice of Pre-Processing Method
        #slice: bool = True,                        # Control Variable for the 2D Conversion of the 3D Dataset
        percentage: bool = False,                   # Control Variable for the Usage of Percentage Values in train_params
        patient_shuffle: bool = False,              # Ability to Shuffle the Patients that compose both Training / Validation and Test Datasets
        sample_shuffle: bool = False,               # Ability to Shuffle the Samples inside both Training / Validation and Test Datasets
    ):

        # Patient Number Variable Logging
        assert(0 < test_patients <= self.num_patients               # Limits for Number of Test Set Patients
        ), f"ERROR: Test Patient Number not Supported!"
        assert(pre_processing == 'Zero Padding' or pre_processing == 'Interpolation' or pre_processing == 'CNN'
        ), "ERROR: Pre-Processing Method not Supported!"
        self.train_patients = self.num_patients - test_patients     # Number of Patients to be used in the Training & Validation Sets
        self.test_patients = test_patients                          # Number of Patients to be used in the Test Sets
        self.pre_processing = pre_processing                        # Chosen Pre-Processing Method

        # Patient Shuffling Feature
        if(patient_shuffle): self.patient_info = self.patient_info.iloc[np.random.permutation(len(self.patient_info))]

        # ----------------------------------------------------------------------------------------------------------------------------

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        if(percentage):
            assert(0 < train_params <= 100                              # Percentage Limits for Number of Training Set's Parameters
            ), f"ERROR: Training Set's Parameter Number not Supported!"
            self.trainTrain_params = train_params                       # Percentage Value for Training Set's Training Parameters
            self.trainVal_params = 100 - train_params                   # Percentage Value for Training Set's Validation Parameters
            assert(0 < test_params <= 100                               # Percentage Limits for Number of Test Set's Parameters
            ), f"ERROR: Test Set's Parameter Number not Supported!"
            self.testTrain_params = test_params                         # Percentage Value for Test Set's Training Parameters
            self.testVal_params = 100 - test_params                     # Percentage Value for Test Set's Validation Parameters

        # Computation of Training & Validation Parameter Numbers (Percentage Input)
        else:
            assert(0 < train_params <= self.num_params                  # Numerical Limits for Number of Training Set's Parameters
            ), f"ERROR: Training Set's Parameter Number not Supported!"
            self.trainTrain_params = train_params                       # Numerical Value for Training Set's Training Parameters
            self.trainVal_params = self.num_params - train_params       # Numerical Value for Training Set's Validation Parameters
            assert(0 < test_params <= self.num_params                   # Numerical Limits for Number of Test Set's Parameters
            ), f"ERROR: Test Set's Parameter Number not Supported!"
            self.testTrain_params = test_params                         # Numerical Value for Test Set's Training Parameters
            self.testVal_params = self.num_params - test_params         # Numerical Value for Test Set's Validation Parameters

        # ----------------------------------------------------------------------------------------------------------------------------

        # Training & Validation Sets Building
        self.train_set = dict.fromkeys(('X_train', 'X_val', 'y_train', 'y_val'))
        X_train = np.empty([0, self.pre_shape]); X_val = np.empty([0, self.pre_shape])
        y_train = np.empty([0, 7]); y_val = np.empty([0, 7])
        for p in range(self.train_patients):

            # Training Patient Data Access & Treatment
            print(f"Adding Patient {self.patient_info['Patient'].iloc[p]}'s Data to the Training Set...")       # Display of the Patient being Added to the Test Set
            pX_train, pX_val, py_train, py_val = self.split_patient(patient_number = p,
                                                                    train_params = self.trainTrain_params,
                                                                    percentage = percentage,
                                                                    sample_shuffle = sample_shuffle)
            X_train = np.concatenate((X_train, pX_train), axis = 0); X_val = np.concatenate((X_val, pX_val), axis = 0)
            y_train = np.concatenate((y_train, py_train), axis = 0); y_val = np.concatenate((y_val, py_val), axis = 0)
        
        self.train_set['X_train'] = pd.DataFrame(X_train); self.train_set['X_val'] = pd.DataFrame(X_val)
        self.train_set['y_train'] = pd.DataFrame(y_train); self.train_set['y_val'] = pd.DataFrame(y_val)
        del X_train, X_val, y_train, y_val, pX_train, pX_val, py_train, py_val

        # ----------------------------------------------------------------------------------------------------------------------------

        # Test Set Building
        self.test_set = dict.fromkeys(('X_train', 'X_val', 'y_train', 'y_val'))                             # Creation of Empty Dictionary to Fit Patient Data
        X_train = np.empty([0, self.pre_shape]); X_val = np.empty([0, self.pre_shape])
        y_train = np.empty([0, 7]); y_val = np.empty([0, 7])
        for p in range(self.train_patients, self.train_patients + self.test_patients):

            # Training Patient Data Access & Treatment
            print(f"Adding Patient {self.patient_info['Patient'].iloc[p]}'s Data to the Test Set...")       # Display of the Patient being Added to the Test Set
            pX_train, pX_val, py_train, py_val = self.split_patient(patient_number = p,
                                                                    train_params = self.testTrain_params,
                                                                    percentage = percentage,
                                                                    sample_shuffle = sample_shuffle)
            X_train = np.concatenate((X_train, pX_train), axis = 0); X_val = np.concatenate((X_val, pX_val), axis = 0)
            y_train = np.concatenate((y_train, py_train), axis = 0); y_val = np.concatenate((y_val, py_val), axis = 0)
        
        self.test_set['X_train'] = pd.DataFrame(X_train); self.test_set['X_val'] = pd.DataFrame(X_val)
        self.test_set['y_train'] = pd.DataFrame(y_train); self.test_set['y_val'] = pd.DataFrame(y_val)
        del X_train, X_val, y_train, y_val, pX_train, pX_val, py_train, py_val

        # ----------------------------------------------------------------------------------------------------------------------------

        # Split Datasets' Content Report
        if(percentage):
            print(tabulate([[self.train_patients, f"{(self.trainTrain_params / 100) * self.num_params} ({self.trainTrain_params}%)", f"{(self.trainVal_params / 100) * self.num_params} ({self.trainVal_params}%)"],
                            [self.test_patients, f"{(self.testTrain_params / 100) * self.num_params} ({self.testTrain_params}%)", f"{(self.testVal_params / 100) * self.num_params} ({self.testVal_params}%)"]],
                            headers = ['No. Patients', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Test Set'], tablefmt = 'fancy_grid'))
        else:
            print(tabulate([[self.train_patients, f"{self.trainTrain_params} ({np.round((self.trainTrain_params / self.num_params) * 100, 2)}%)", f"{self.trainVal_params} ({np.round((self.trainVal_params / self.num_params) * 100, 2)}%)"],
                            [self.test_patients, f"{self.testTrain_params} ({np.round(self.testTrain_params / self.num_params, 2)}%)", f"{self.testVal_params} ({np.round(self.testVal_params / self.num_params, 2)}%)"]],
                            headers = ['No. Patients', 'Training Parameters', 'Validation Parameters'],
                            showindex = ['Training Set', 'Test Set'], tablefmt = 'fancy_grid'))

    
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Dataset Saving Function
    def save(
        self,
        path: Path,
        version: int = 0,
    ):
        f = open(f'{path}/Vertical 3D MUDI (Version {version})', 'wb')
        pickle.dump(self, f)
        f.close

    # ----------------------------------------------------------------------------------------------------------------------------

    # Dataset Loading Function
    def load(
        path: Path,
        version: int = 0,
    ):
        f = open(f'{path}/Vertical 3D MUDI (Version {version})', 'rb')
        mudi = pickle.load(f)
        f.close
        return mudi



In [85]:
# Dataset Initialization Example
mudi = v3DMUDI( parse.patient_folderpath,
                parse.mask_folderpath,
                parse.param_filepath,
                parse.info_filepath)
#mudi.split()

In [None]:
# Dataset Loading
mudi = v3DMUDI.load(Path(f"{main_folderpath}Saved Data"), version = 0)

In [86]:
# 3D Interactive Plotting Function
def plot(
    sample_number,
    slice_number,
):

    # Patient Sample & Slice for Visualization
    img = data[sample_number]
    img = rotate(img[slice_number].T, angle = 180)
    #img = data[slice_number, :, :, sample_number].T
    plt.figure(figsize = (10, 20)); plt.imshow(img, cmap = 'gray'); plt.axis('off')
    plt.title(f"Patient #{patient_number} | Sample #{sample_number} | Slice #{slice_number}")

# ----------------------------------------------------------------------------------------------------------------------------

# Patient Data Visualization Function
patient_number = 0; data = mudi.get_patient(patient_number)
sample_slider = IntSlider(value = 0, min = 0, max = data.shape[0] - 1, description = 'Sample', continuous_update = False)
slice_slider = IntSlider(value = 0, min = 0, max = data.shape[1] - 1, description = 'Slice', continuous_update = False)
interactive(plot, sample_number = sample_slider, slice_number = slice_slider)


interactive(children=(IntSlider(value=0, continuous_update=False, description='Sample', max=1343), IntSlider(v…