# *Initial* **Setup**

## **Library** *Settings*

In [None]:
# Library Download
%pip install --upgrade pip
%pip install numpy
%pip install argparse
%pip install pandas
%pip install scipy
%pip install torch
%pip install sklearn
%pip install keras==2.10
%pip install tensorflow
%pip install matplot
%pip install plotly
%pip install h5py
%pip install h5pyViewer
%pip install pytorch_lightning
%pip install dipy
%pip install openpyxl
%pip install tabulate

In [1]:
# Library Import
import os
import sys
import pickle
import psutil
import numpy as np
import argparse
import pandas as pd
import scipy
import torch
import torch.nn as nn
import pytorch_lightning as pl
import sklearn
import keras
import tensorflow as tf
#tf.compat.v1.enable_eager_execution(config=None, device_policy=None, execution_mode=None)
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import h5py
import dipy
import warnings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Functionality Import
from pathlib import Path
from typing import List, Literal, Optional, Callable, Dict, Literal, Optional, Union, Tuple
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from pytorch_lightning.utilities.cli import DATAMODULE_REGISTRY
from tensorflow.keras.layers import concatenate, Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
tf._api.v2.compat.v1.disable_v2_behavior()
from dipy.reconst.shm import cart2sphere, real_sh_descoteaux_from_index, sph_harm_ind_list
from PIL import Image
from tabulate import tabulate
warnings.filterwarnings('ignore')

Instructions for updating:
non-resource variables are not supported in the long term


## **Control** *Station*

In [3]:
# Parser Initialization
parser = argparse.ArgumentParser(
    description = "MUDIVisualizer")

# Filepath Arguments
path = parser.add_argument_group('Required Filepaths')
main_folderpath = '../../../Datasets/MUDI Dataset/'
path.add_argument('--param_filepath', type = Path, default = main_folderpath + 'Raw Data/parameters_new.xlsx',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Parameters')
path.add_argument('--patient_folderpath', type = Path, default = main_folderpath + 'Patient Data',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Patient Information')
path.add_argument('--info_filepath', type = Path, default = main_folderpath + 'Raw Data/header1_.csv',
                    help = 'Filepath for DHDF5 File containing MUDI Dataset Parameters')

# ----------------------------------------------------------------------------------------------------------------------------

# Control Arguments for Datasets' Organization
data = parser.add_argument_group("MUDI Dataset's Control Parameters")
data.add_argument('--batch_size', type = int, default = 500,
                    help = "Batch Size for DataLoaders")
data.add_argument('--vShuffle', type = bool, default = False,
                    help = 'Control Variable for Vertical / Voxel Shuffle in Dataset')
data.add_argument('--hShuffle', type = bool, default = False,
                    help = 'Control Variable for Horizontal / Parameter Shuffle in Dataset')

# ----------------------------------------------------------------------------------------------------------------------------

# Control Arguments for CVAE Model's Parameters
cvae = parser.add_argument_group("CVAE Control Parameters")
cvae.add_argument('--nEpoch', type = int, default = 50,
                    help = "Number of Epochs for Model Training")
cvae.add_argument('--latentK', type = int, default = 50,
                    help = "Latent Space Dimensionality")
cvae.add_argument('--alpha', type = float, default = 0.001,
                    help = "Optimizers' Learning Rate Value")

# CVAE Version II - Keras Arguments
cvae_keras = cvae.add_argument_group("Keras V2")
cvae_keras.add_argument('--nnDim', type = int, default = 512,
                        help = "Encoder's Neural Network Dimensionality")

# ----------------------------------------------------------------------------------------------------------------------------

parse = parser.parse_args("")
parse.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# **Data** *Access*

In [4]:
# Dataset Access Requirements
sys.path.append('../../../Datasets/MUDI Dataset/Dataset Reader')
from v1DMUDI import v1DMUDI

In [5]:
# Dataset Initialization & Saving Example
mudi = v1DMUDI(   parse.patient_folderpath,
                parse.param_filepath,
                parse.info_filepath)
mudi.split()                                                # Dataset Splitting
mudi.save(Path(f"{main_folderpath}Saved Data"))             # Dataset Saving

Adding Patient 11's Data to the Training Set...
Adding Patient 12's Data to the Training Set...
Adding Patient 13's Data to the Training Set...
Adding Patient 14's Data to the Training Set...
Adding Patient 15's Data to the Test Set...
╒══════════════╤════════════════╤═══════════════════════╤═════════════════════════╕
│              │   No. Patients │ Training Parameters   │ Validation Parameters   │
╞══════════════╪════════════════╪═══════════════════════╪═════════════════════════╡
│ Training Set │              4 │ 500 (37.2%)           │ 844 (62.8%)             │
├──────────────┼────────────────┼───────────────────────┼─────────────────────────┤
│ Test Set     │              1 │ 20 (0.01%)            │ 1324 (0.99%)            │
╘══════════════╧════════════════╧═══════════════════════╧═════════════════════════╛


In [23]:
# Dataset Loading
mudi = vMUDI.load(Path(f"{main_folderpath}Saved Data"), version = 0)

## *Pre-Processing* **Convolutional Layer**

In [None]:
# 1D Image Pre-Processing Function
    def pre_process(
        self,
        img: pd.DataFrame,
    ):

        # Input Variable Assertions
        assert(img.ndim == 2), "ERROR: Input Image Shape not Supported! (2D Arrays only)"
        assert(self.pre_shape < img.shape[1]), "ERROR: Convolution Layer Size must be smaller than Original Image's no. of Voxels!"

        # Convolutional ayer Creation (using Patient's No. Voxels)
        model = Sequential()
        model.add(Conv2D(   self.pre_shape, (1, 1),
                            padding = 'same',
                            activation = 'relu',
                            input_shape = (1, self.num_params, img.shape[1])))

        # Input Formatting & Pre-Processing
        self.img = img
        img = tf.convert_to_tensor(img.values, dtype = 'float32')           # Pandas DataFrame -> Tensorflow Tensor
        img = tf.reshape(img, (1, 1, self.num_params, img.shape[1]))        # Convolutional Input Tensor Reshapping 2D -> 4D
        img = tf.reshape(model(img), (self.num_params, self.pre_shape))     # Convolutional Output Tensor Reshapping 4D -> 2D
        self.img_final = img
        return img.eval(session=tf.compat.v1.Session())  

In [None]:
a = tf.constant([[1, 2], [3, 4]])
a.eval(session=tf.compat.v1.Session())  
#a.numpy()

In [None]:
#
class preProcess(nn.Module):
    
    # Constructor / Initialization Function
    def __init__(
        self,
        img_shape: int = 108300,
        final_shape: int = 100000,
    ):

        # Neural Convolution Building
        super(preProcess, self).__init__()
        self.final_shape = final_shape
        self.conv1 = nn.Conv2d(img_shape, self.final_shape, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.linear = nn.Dens
    
    #
    def forward(self, img_df: pd.DataFrame):

        # Input Variable Assertions
        assert(img_df.ndim == 2), "ERROR: Input Image Shape not Supported! (2D Arrays only)"
        assert(self.final_shape < img_df.shape[1]), "ERROR: Convolution Layer Size must be smaller than Original Image's no. of Voxels!"

        # Neural Network Application
        img_df = torch.tensor(img_df.values.astype(np.float32))
        for i in range(img_df.shape[0]):
            img = np.array(img_df[i]).reshape((img_df.shape[1], 1, 1, 1))
            img_df[i] = self.conv1(img)
        return img_df

In [None]:
model = preProcess()
out = model(mudi.img)
print(out)

## *Pre-Processing* **Dimensionality Reduction**

In [None]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(mudi.img)
cumulative_sum = np.cumsum(pca.explained_variance_ratio_)
print(np.argmax(cumulative_sum >= 0.99) + 1)

# **Model** Build

## **Keras** *Model*

Since input values will have a *different number of voxels from patient to patient*, it is a requirement that the Encoder can have different input sizes, hence the use of **Global Average Pooling** for each of the batches. The first try was using 2D Pooling, and the intuition would say that this does work, since there is no need to have the encoder's input shape change from batch to batch, only when changing patients.

In [26]:
# Keras CVAE Model Implementation Class
class Keras_CVAE():

    # Constructor / Initialization Function
    def __init__(
        self,
        
        # Dataset Handling Requirements
        data: vMUDI,                                # Dataset containing Parameters, Training and Test Set, and Everything inbetween
        pre_shape: int = 1200,                    # Intermediate Dataset Shape as of Pre-Processing
        #
        
        # Model Creation & Optimization Requirements
        model_folderpath: Path = Path(""),          # Path for Model Saving Folder
        activ: str = 'relu',                        # Activation Function (Default: ReLU)
        conv_nn: int = 512,                         # Encoder Convolutional Neural Network Layer Shape
        lr: float = 0.001,                          # Learning Rate for Training Mode of both Datasets
        latent_k: int = 50,                         # Latent Space Dimensionality
        display: bool = False,                      # Control Variable for the Display of Error Values, etc
    ):

        # Class Requirement Variable Assertions
        #assert

        # Class Requirement Variable Logging
        super(Keras_CVAE).__init__()
        self.data = data; self.pre_shape = pre_shape
        self.model_folderpath = model_folderpath
        self.activ = activ; self.conv_nn = conv_nn
        self.lr = lr; self.latent_k = latent_k
        self.display = display; self.arch()

    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Model Architecture Building Function
    def arch(self,):

        print("Building CVAE Model...")

        # Data Dimensionality Definition
        X = Input(shape = (self.pre_shape, ))                           # Image Data Shape ---- [Batch Size (??), Intermediate Pre-Processed Size]
        y = Input(shape = ((self.data.params.shape[1], )))              # Image Label Shape --- [?, 7]
        input = concatenate([X, y], axis = 1)                           # Encoder Input Concatenation
        if self.display: print(f"    Encoder Input: {input.shape} [X: {X.shape} + y: {y.shape}]")

        # Encoder Architecture & Pipeline
        optim = Adam(lr = self.lr)                                      # Encoder Optimizer Function
        encoder = Dense(self.conv_nn, activation = self.activ)(input)   # Encoder Layer Definition
        mu = Dense(self.latent_k, activation  = 'linear')(encoder)      #
        sigma = Dense(self.latent_k, activation = 'linear')(encoder)    #

        # -------------------------------------------------------------

        # Latent Space Sampling Function
        def sample(args):
            mu, sigma = args
            eps = K.random_normal(  shape = (self.latent_k, ), 
                                    mean = 0.0, stddev = 1.0)
            return mu + K.exp(sigma / 2) * eps

        # -------------------------------------------------------------

        # Latent Space 
        z = Lambda( sample,                                              # Encoder Output /
                    output_shape = (self.latent_k, ))([mu, sigma])       # Latent Space Representation
        zc = concatenate([z, y], axis = 1)                               # Full Decoder Input / Latent Space Representation
        if self.display: print(f"    Encoder Output: {z.shape}"); print(f"    Latent Space Representation: {zc.shape}")

        # Decoder Architecture
        decoder1 = Dense(   self.latent_k + self.data.params.shape[1],  #
                            activation = self.activ)                    #
        decoder2 = Dense(   self.pre_shape,                             #
                            activation = 'sigmoid')                     #
        output = decoder2(decoder1(zc))                                 #

        # Decoder Architecture & Pipeline
        decoder_input = Input(shape = (self.latent_k + self.data.params.shape[1], ))        #
        decoder_output = decoder2(decoder1(decoder_input))                                  #
        if self.display: print(f"    Decoder Input: {decoder_input.shape}"); print(f"    Decoder Output: {decoder_output.shape}")

        # -------------------------------------------------------------

        # Loss Functions
        def vae_loss(y, p):
            recon = K.sum(K.binary_crossentropy(y, p), axis = -1)
            kl = 0.5 * K.sum(K.exp(sigma) + K.square(mu) - 1.0 - sigma, axis=-1)
            return recon + kl

        def KL_loss(y, p):
            return(0.5 * K.sum(K.exp(sigma) + K.square(mu) - 1.0 - sigma, axis = 1))

        def recon_loss(y, p):
            return K.sum(K.binary_crossentropy(y, p), axis = -1)
        
        # -------------------------------------------------------------

        # Full Model Pipeline
        self.encoder = Model([X, y], mu)                                # Encoder Model Compilation
        self.decoder = Model(decoder_input, decoder_output)             # Decoder Model Compilation
        self.model = Model([X, y], output)                              # CVAE Model Compilation
        self.model.compile( optimizer = optim,                          # CVAE Optimization Method & Metrics Definition
                            loss = vae_loss,
                            metrics = [KL_loss, recon_loss])


    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    # ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

    # Model Training Mode
    def train(
        self,
        dataset: str = 'train',                     # Dataset to Execute Training on (Training / Test Set)
        n_epochs: int = 500,                        # Number of Epochs for Training Mode of both Datasets
        batch_size: int = 500,                      # Batch Size for Training Mode of both Datasets
    ):

        # Dataset Choice Assertion
        self.train_epochs = n_epochs; self.batch_size = batch_size
        assert(dataset == 'train' or dataset == 'test'), "Dataset Chosen not Found!"
        if dataset == 'train':
            ds = self.data.train_set
            n_patients = self.data.train_patients
        else:
            ds = self.data.test_set
            n_patients = self.data.test_patients

        # Keras Training Functionality
        self.model_hist = self.model.fit(   [np.array(ds['X_train']), np.array(ds['y_train'])], np.array(ds['X_train']),                # Training Data
                                            batch_size = self.batch_size,                                                               # Batch Size
                                            epochs = self.train_epochs,                                                                 # Number of Epochs
                                            validation_data = [np.array(ds['X_val']), np.array(ds['y_val'])], np.array(ds['X_val']),    # Validation Data
                                            callbacks = [EarlyStopping(patience = 5)])                                                  # Inclusion of Early Stopping
        #if (~self.display): print()
        

SyntaxError: positional argument follows keyword argument (2512756107.py, line 133)

In [25]:
# Model Creation & Training
cvae = Keras_CVAE(data = mudi)
cvae.train('train')

Building CVAE Model...
    Encoder Input: (?, 1207) [X: (?, 1200) + y: (?, 7)]
    Encoder Output: (?, 50)
    Latent Space Representation: (?, 57)
    Decoder Input: (?, 57)
    Decoder Output: (?, 1200)
<class 'numpy.ndarray'>
Train on 2000 samples, validate on 3376 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Train on 2000 samples, validate on 3376 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Train on 2000 samples, validate on 3376 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Train on 2000 samples, validate on 3376 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
