In [10]:
import os
import pandas as pd
import nibabel as nib
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from skimage.measure import block_reduce
import numpy as np
from concurrent.futures import ProcessPoolExecutor
import time
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import tensorly as tl

#Debugging import
import importlib
var = 'TensorDecisionTreeRegressorP' #the published version of code
package = importlib.import_module(var)
for name, value in package.__dict__.items():
    if not name.startswith("__"):
        globals()[name] = value

from TensorDecisionTreeRegressorP import *

import os
import nibabel as nib
import numpy as np
import matplotlib as plt
import pandas as pd
from sklearn.model_selection import train_test_split


# File path to the CSV file
csv_file = '/Users/zc56/Documents/CommenDesktop/RICE/MyProject/Bayes_Tensor_Tree/3D-images/ADNIData.csv'
df = pd.read_csv(csv_file)

# Remove rows where ADAS11_bl is missing (NaN)
#df_cleaned = df.dropna(subset=['ADAS11_bl'])
df_cleaned = df.dropna(subset=['MMSE_bl'])

# Extract the 'ADAS11_bl' column as the y variable
#y_variable = df_cleaned['ADAS11_bl'].values
y_variable = df_cleaned['MMSE_bl'].values

# Split the dataframe based on the DX_bl column values
cn_group = df_cleaned[df_cleaned['DX_bl'] == 'CN']
ad_group = df_cleaned[df_cleaned['DX_bl'] == 'AD']
lmci_group = df_cleaned[df_cleaned['DX_bl'] == 'LMCI']

# Display the counts for each group after removing NA
print(f"CN group size: {cn_group.shape[0]}")
print(f"AD group size: {ad_group.shape[0]}")
print(f"LMCI group size: {lmci_group.shape[0]}")

# Directory containing the 3D images
directory = '/Users/zc56/Documents/CommenDesktop/RICE/MyProject/Bayes_Tensor_Tree/3D-images/3D-Images/bl'

# Initialize dictionaries to hold the images and y values for each group
cn_images, ad_images, lmci_images = [], [], []
cn_y, ad_y, lmci_y = [], [], []

# Function to load the images based on PTID matching and append y values
def load_images_and_y(group, image_list, y_list):
    for _, row in group.iterrows():
        ptid = row['PTID']
        # Find the corresponding file based on PTID
        filename = f'{ptid}.nii.gz'
        file_path = os.path.join(directory, filename)
        
        if os.path.exists(file_path):
            # Load the NIfTI file
            img = nib.load(file_path)
            data = img.get_fdata()
            
            # Append the 3D image data and y value to the respective lists
            image_list.append(data)
            y_list.append(row['ADAS11_bl'])
        else:
            print(f"File {filename} not found.")

# Load images and y values for each group
load_images_and_y(cn_group, cn_images, cn_y)
load_images_and_y(ad_group, ad_images, ad_y)
load_images_and_y(lmci_group, lmci_images, lmci_y)

# Convert lists of 3D images and y values to NumPy arrays
if cn_images:
    cn_tensor = np.stack(cn_images, axis=0)
    cn_y = np.array(cn_y)
    print(f"CN 4D tensor shape: {cn_tensor.shape}")
    print(f"CN y shape: {cn_y.shape}")
else:
    print("No CN images loaded.")

if ad_images:
    ad_tensor = np.stack(ad_images, axis=0)
    ad_y = np.array(ad_y)
    print(f"AD 4D tensor shape: {ad_tensor.shape}")
    print(f"AD y shape: {ad_y.shape}")
else:
    print("No AD images loaded.")

if lmci_images:
    lmci_tensor = np.stack(lmci_images, axis=0)
    lmci_y = np.array(lmci_y)
    print(f"LMCI 4D tensor shape: {lmci_tensor.shape}")
    print(f"LMCI y shape: {lmci_y.shape}")
else:
    print("No LMCI images loaded.")

CN group size: 229
AD group size: 188
LMCI group size: 401
CN 4D tensor shape: (229, 48, 48, 48)
CN y shape: (229,)
AD 4D tensor shape: (188, 48, 48, 48)
AD y shape: (188,)
LMCI 4D tensor shape: (401, 48, 48, 48)
LMCI y shape: (401,)


In [25]:
def _rank_k_approx_error(self, X,depth=None):
        if len(X)<=self.min_samples_split:
            return np.inf
        if self.lowrank_method=='cp':
            weights, factors = parafac(X, rank=self.split_rank, l2_reg = np.finfo(np.float32).eps)
            rank_k_approx = tl.cp_to_tensor((weights, factors))
            return tl.norm(X - rank_k_approx)
        if self.lowrank_method=='tucker':
            core, factors = tucker(X, rank=[X.shape[0],self.split_rank,self.split_rank])
            rank_k_approx = tl.tucker_to_tensor((core, factors)) 
            return tl.norm(X - rank_k_approx)
        if self.lowrank_method=='constrained_cp':
            weights, factors = constrained_parafac(X, rank=self.split_rank, l1_reg = 0.1) #To avoid super-sparse tensor
            rank_k_approx = tl.cp_to_tensor((weights, factors))
            return tl.norm(X - rank_k_approx)

       

def _rank_k_reg_error(X, y, lowrank_method = 'cp', CP_reg_rank = 3, Tucker_reg_rank = 3, verbose = 0):
    if lowrank_method=='cp':
        model = CPRegressor(weight_rank=CP_reg_rank, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        return tl.norm(y - y_predict)
    if lowrank_method=='tucker':
        model = TuckerRegressor(weight_ranks=[Tucker_reg_rank, Tucker_reg_rank, Tucker_reg_rank, Tucker_reg_rank], tol=10e-7, reg_W=0, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        return tl.norm(y - y_predict)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(lmci_tensor, lmci_y, test_size=0.2, random_state=42)
print(X_train.shape,y_train.shape)
X_coarsen_shape = (1,4,4,4)
X_coarsen_func = np.max
X_train_c = block_reduce(X_train,block_size=X_coarsen_shape, func=X_coarsen_func)
X_test_c = block_reduce(X_test,block_size=X_coarsen_shape, func=X_coarsen_func)
print(X_train_c.shape,X_test_c.shape)

(320, 48, 48, 48) (320,)
(320, 12, 12, 12) (81, 12, 12, 12)


In [13]:
_rank_k_reg_error(X_train_c, y_train, lowrank_method = 'cp', CP_reg_rank = 3)

53.64607606272227

In [14]:
def _rank_k_reg_error(X, y, lowrank_method = 'cp', CP_reg_rank = 3, Tucker_reg_rank = 3, verbose = 0):
    if lowrank_method=='cp':
        model = CPRegressor(weight_rank=CP_reg_rank, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        return tl.norm(y - y_predict)
    if lowrank_method=='tucker':
        model = TuckerRegressor(weight_ranks=[Tucker_reg_rank, Tucker_reg_rank, Tucker_reg_rank, Tucker_reg_rank], tol=10e-7, reg_W=0, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        return tl.norm(y - y_predict)
    
error = _rank_k_reg_error(X_train_c, y_train, lowrank_method = 'cp', CP_reg_rank = 3)
for i in range(1,X_train_c.shape[3]):
    errorl = _rank_k_reg_error(X_train_c[:,:,:,:i], y_train, lowrank_method = 'cp', CP_reg_rank = 3)
    errorr = _rank_k_reg_error(X_train_c[:,:,:,i:], y_train, lowrank_method = 'cp', CP_reg_rank = 3)
    total_error = errorl+errorr
    print(total_error<error)
    print("error: ", error)
    print("total_error: ", total_error)

False
error:  53.133044556333694
total_error:  147.05888552156313
False
error:  53.133044556333694
total_error:  133.55521257349582
False
error:  53.133044556333694
total_error:  123.36168897779186
False
error:  53.133044556333694
total_error:  120.32474724485284
False
error:  53.133044556333694
total_error:  120.15889976541014
False
error:  53.133044556333694
total_error:  122.09589452368151
False
error:  53.133044556333694
total_error:  122.6370915001132
False
error:  53.133044556333694
total_error:  122.28643022598055
False
error:  53.133044556333694
total_error:  125.87657895456282
False
error:  53.133044556333694
total_error:  132.27343747497952
False
error:  53.133044556333694
total_error:  139.27228202740653


In [26]:
error = _rank_k_reg_error(X_train_c, y_train, lowrank_method = 'tucker', CP_reg_rank = 3, Tucker_reg_rank=3)
for i in range(1,X_train_c.shape[3]-1):
    errorl = _rank_k_reg_error(X_train_c[:,:,:,:i], y_train, lowrank_method = 'tucker', CP_reg_rank = 3, Tucker_reg_rank=3)
    errorr = _rank_k_reg_error(X_train_c[:,:,:,i:], y_train, lowrank_method = 'tucker', CP_reg_rank = 3, Tucker_reg_rank=3)
    total_error = errorl+errorr
    print(total_error<error)
    print("error: ", error)
    print("total_error: ", total_error)

ValueError: shapes (144,9) and (27,3) not aligned: 9 (dim 1) != 27 (dim 0)

In [4]:
weights, factors = parafac(X_train_c, rank=3, l2_reg = np.finfo(np.float32).eps)
rank_k_approx = tl.cp_to_tensor((weights, factors))
error = tl.norm(X_train_c - rank_k_approx)
for i in range(1,X_train_c.shape[3]):
    weightsl, factorsl = parafac(X_train_c[:,:,:,:i], rank=3, l2_reg = np.finfo(np.float32).eps)
    weightsr, factorsr = parafac(X_train_c[:,:,:,i:], rank=3, l2_reg = np.finfo(np.float32).eps)
    rank_k_approxl = tl.cp_to_tensor((weightsl, factorsl))
    rank_k_approxr = tl.cp_to_tensor((weightsr, factorsr))
    errorl = tl.norm(X_train_c[:,:,:,:i] - rank_k_approxl)
    errorr = tl.norm(X_train_c[:,:,:,i:] - rank_k_approxr)
    total_error = errorl+errorr
    print(total_error<error)
    print("error: ", error)
    print("total_error: ", total_error)

False
error:  866.4934412132004
total_error:  891.8753613211642
False
error:  866.4934412132004
total_error:  956.0350379949093
False
error:  866.4934412132004
total_error:  1061.478829020447
False
error:  866.4934412132004
total_error:  1137.6366514027616
False
error:  866.4934412132004
total_error:  1178.7994753445068
False
error:  866.4934412132004
total_error:  1213.5266483204848
False
error:  866.4934412132004
total_error:  1189.4389944973584
False
error:  866.4934412132004
total_error:  1148.1195374253034
False
error:  866.4934412132004
total_error:  1073.584214092944
False
error:  866.4934412132004
total_error:  972.8809334949001
False
error:  866.4934412132004
total_error:  901.8098472158539


In [6]:
weights, factors = parafac(X_train_c, rank=3, l2_reg = np.finfo(np.float32).eps)
rank_k_approx = tl.cp_to_tensor((weights, factors))
error = tl.norm(X_train_c - rank_k_approx)
for i in range(1,X_train_c.shape[3]):
    weightsl, factorsl = parafac(X_train_c[:,:,:i,:], rank=3, l2_reg = np.finfo(np.float32).eps)
    weightsr, factorsr = parafac(X_train_c[:,:,i:,:], rank=3, l2_reg = np.finfo(np.float32).eps)
    rank_k_approxl = tl.cp_to_tensor((weightsl, factorsl))
    rank_k_approxr = tl.cp_to_tensor((weightsr, factorsr))
    errorl = tl.norm(X_train_c[:,:,:i,:] - rank_k_approxl)
    errorr = tl.norm(X_train_c[:,:,i:,:] - rank_k_approxr)
    total_error = errorl+errorr
    print(total_error<error)

False
False
False
False
False
False
False
False
False
False
False


In [7]:
weights, factors = parafac(X_train_c, rank=3, l2_reg = np.finfo(np.float32).eps)
rank_k_approx = tl.cp_to_tensor((weights, factors))
error = tl.norm(X_train_c - rank_k_approx)
for i in range(1,X_train_c.shape[3]):
    weightsl, factorsl = parafac(X_train_c[:,:i,:,:], rank=3, l2_reg = np.finfo(np.float32).eps)
    weightsr, factorsr = parafac(X_train_c[:,i:,:,:], rank=3, l2_reg = np.finfo(np.float32).eps)
    rank_k_approxl = tl.cp_to_tensor((weightsl, factorsl))
    rank_k_approxr = tl.cp_to_tensor((weightsr, factorsr))
    errorl = tl.norm(X_train_c[:,:i,:,:] - rank_k_approxl)
    errorr = tl.norm(X_train_c[:,i:,:,:] - rank_k_approxr)
    total_error = errorl+errorr
    print(total_error<error)

False
False
False
False
False
False
False
False
False
False
False


In [28]:
def _rank_k_reg_error(X, y, lowrank_method='cp', CP_reg_rank=3, Tucker_reg_rank=3, verbose=0):
    # Check tensor shape before fitting
    print(f"Tensor shape before fitting: {X.shape}")

    # Adjust Tucker ranks to ensure they do not exceed tensor dimensions
    if lowrank_method == 'tucker':
        ranks = [
            min(X.shape[1], Tucker_reg_rank),  # For mode 1
            min(X.shape[2], Tucker_reg_rank),  # For mode 2
            min(X.shape[3], Tucker_reg_rank)   # For mode 3
        ]
        print(f"Adjusted Tucker ranks: {ranks}")

        model = TuckerRegressor(weight_ranks=ranks, tol=10e-7, reg_W=0, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        # Compute and return error
        error = tl.norm(y - y_predict)
        print(f"Tucker Regression error: {error}")
        return error

    elif lowrank_method == 'cp':
        model = CPRegressor(weight_rank=CP_reg_rank, verbose=verbose)
        model.fit(X, y)
        y_predict = model.predict(X)
        # Compute and return error
        error = tl.norm(y - y_predict)
        print(f"CP Regression error: {error}")
        return error

    else:
        raise ValueError(f"Unknown lowrank_method: {lowrank_method}")

# Main loop to split the tensor and compute errors
error = _rank_k_reg_error(X_train_c, y_train, lowrank_method='tucker', CP_reg_rank=3, Tucker_reg_rank=3)

for i in range(1, X_train_c.shape[3]-1):
    print(f"Processing split at index {i}")

    # Compute error for left slice
    X_left = X_train_c[:, :, :, :i]
    print(f"Left tensor slice shape: {X_left.shape}")
    errorl = _rank_k_reg_error(X_left, y_train, lowrank_method='tucker', CP_reg_rank=3, Tucker_reg_rank=3)

    # Compute error for right slice
    X_right = X_train_c[:, :, :, i:]
    print(f"Right tensor slice shape: {X_right.shape}")
    errorr = _rank_k_reg_error(X_right, y_train, lowrank_method='tucker', CP_reg_rank=3, Tucker_reg_rank=3)

    total_error = errorl + errorr
    print(f"Total error for split at index {i}: {total_error}")
    print(f"Is total_error < error? {total_error < error}")
    print("Current total error:", total_error)
    print("Original error:", error)



Tensor shape before fitting: (320, 12, 12, 12)
Adjusted Tucker ranks: [3, 3, 3]
Tucker Regression error: 54.416356656488006
Processing split at index 1
Left tensor slice shape: (320, 12, 12, 1)
Tensor shape before fitting: (320, 12, 12, 1)
Adjusted Tucker ranks: [3, 3, 1]


LinAlgError: Singular matrix