# Imports

In [35]:
import numpy as np
import sys
import pickle
import os
import pandas as pd

## Helper Functions

In [2]:
%store -r analysis_hf
sys.path.insert(0, analysis_hf)
import covariance as c
import visualization as viz

# Set Directory Paths

In [3]:
# Directory path to load formatted Thickness At Path Data
%store -r thickAtPath_dataDir
# Directory path to save Calcualted Analysis Data
%store -r thickAtPath_CalData
# Directory path to save Figures
%store -r thickAtPath_Fig

# Loading

## Volume Values at Pathology Regions

### Loading Volume Values at Pathology Regions - W Score

In [4]:
# HCVolumeAtPath_w
with open(os.path.join(thickAtPath_CalData, 'HCVolumeAtPath_w.pkl'), 'rb') as f:
    HCVolumeAtPath_w = pickle.load(f)
f.close()

# TAUVolumeAtPath_w
with open(os.path.join(thickAtPath_CalData, 'TAUVolumeAtPath_w.pkl'), 'rb') as f:
    TAUVolumeAtPath_w = pickle.load(f)
f.close()

# TDPVolumeAtPath_w
with open(os.path.join(thickAtPath_CalData, 'TDPVolumeAtPath_w.pkl'), 'rb') as f:
    TDPVolumeAtPath_w = pickle.load(f)
f.close()

# Parameter/Variable Setting

## P-value Threshold List

In [5]:
# pthresh_list = [0.05]
pthresh = 0.05

## Covariance Matrix Threshold (to remove noise)

In [6]:
cov_thresh = 0.1

## Helper Function - Calculating Cov Mat

In [7]:
def calCovMatDict(dataHC, dataTAU, dataTDP, pthresh, cov_thresh):
    cov_dict = {}
    
    covMatHC = c.covCal(dataHC, dataHC, cov_thresh)
    covMatTAU = c.covCal(dataTAU, dataTAU, cov_thresh)
    covMatTDP = c.covCal(dataTDP, dataTDP, cov_thresh)
    
    cmpCovTAU_gt_TDP, cmpCovTDP_gt_TAU = c.covCalSigXY(dataTAU, dataTDP, covMatTAU, covMatTDP, pthresh, cov_thresh)
    cmpCovTAU_gt_HC, cmpCovTAU_lt_HC = c.covCalSigXY(dataTAU, dataHC, covMatTAU, covMatHC, pthresh, cov_thresh)
    cmpCovTDP_gt_HC, cmpCovTDP_lt_HC = c.covCalSigXY(dataTDP, dataHC, covMatTDP, covMatHC, pthresh, cov_thresh)
    
    cmpCovTAU_gt_TDP_raw, cmpCovTDP_gt_TAU_raw = c.covCalSigXYRaw(dataTAU, dataTDP, covMatTAU, covMatTDP, cov_thresh)
    cmpCovTAU_gt_HC_raw, cmpCovTAU_lt_HC_raw = c.covCalSigXYRaw(dataTAU, dataHC, covMatTAU, covMatHC, cov_thresh)
    cmpCovTDP_gt_HC_raw, cmpCovTDP_lt_HC_raw = c.covCalSigXYRaw(dataTDP, dataHC, covMatTDP, covMatHC, cov_thresh)
    
    cov_dict["HC"] = covMatHC
    cov_dict["TAU"] = covMatTAU
    cov_dict["TDP"] = covMatTDP
    
    cov_dict["TAU_gt_TDP"] = cmpCovTAU_gt_TDP
    cov_dict["TDP_gt_TAU"] = cmpCovTDP_gt_TAU
    cov_dict["TAU_gt_HC"] = cmpCovTAU_gt_HC
    cov_dict["TAU_lt_HC"] = cmpCovTAU_lt_HC
    cov_dict["TDP_gt_HC"] = cmpCovTDP_gt_HC
    cov_dict["TDP_lt_HC"] = cmpCovTDP_lt_HC
    
    cov_dict["TAU_gt_TDP_raw"] = cmpCovTAU_gt_TDP_raw
    cov_dict["TDP_gt_TAU_raw"] = cmpCovTDP_gt_TAU_raw
    cov_dict["TAU_gt_HC_raw"] = cmpCovTAU_gt_HC_raw
    cov_dict["TAU_lt_HC_raw"] = cmpCovTAU_lt_HC_raw
    cov_dict["TDP_gt_HC_raw"] = cmpCovTDP_gt_HC_raw
    cov_dict["TDP_lt_HC_raw"] = cmpCovTDP_lt_HC_raw
    
    return cov_dict

# Calculate Covariance Matrices (Whole)

## Volume at Path Data - W Score

In [8]:
cov_volAtPath_w_dict = calCovMatDict(HCVolumeAtPath_w, TAUVolumeAtPath_w, TDPVolumeAtPath_w, pthresh, cov_thresh)

In [12]:
cov_volAtPath_w_dict["TAU_gt_TDP_raw"]

array([[       nan, 0.52545084, 0.72943184, ..., 0.79960581,        nan,
        0.76808254],
       [0.52545084,        nan, 0.42262678, ...,        nan,        nan,
               nan],
       [0.72943184, 0.42262678,        nan, ...,        nan,        nan,
               nan],
       ...,
       [0.79960581,        nan,        nan, ...,        nan, 0.73483698,
        0.269005  ],
       [       nan,        nan,        nan, ..., 0.73483698,        nan,
               nan],
       [0.76808254,        nan,        nan, ..., 0.269005  ,        nan,
               nan]])

# Calculate Covariance Matrices (Bagging)

## TAU > TDP

In [23]:
%%time
bagged_TAU_gt_TDP_raw = []

for row in range(TAUVolumeAtPath_w.shape[0]):
    # Get the TAU W Score Bag
    tauW_bag = np.delete(TAUVolumeAtPath_w, row, axis=0)
    
    # Calculate the Covariance matrix for bagged TAU W score
    covMatTAU_bag = c.covCal(tauW_bag, tauW_bag, cov_thresh)
    
    # Calculate Cov Mat TAU > TDP Raw - for bagged TAU W Score / For TDP we would use the whole.
    cmpCovTAU_gt_TDP_raw_bag, _ = c.covCalSigXYRaw(tauW_bag, TDPVolumeAtPath_w, covMatTAU_bag, 
                                                   cov_volAtPath_w_dict["TDP"], cov_thresh)
    
    bagged_TAU_gt_TDP_raw.append(cmpCovTAU_gt_TDP_raw_bag)

CPU times: user 20.9 s, sys: 53.9 ms, total: 21 s
Wall time: 20.9 s


In [24]:
len(bagged_TAU_gt_TDP_raw)

26

## TDP > TAU

In [29]:
%%time
bagged_TDP_gt_TAU_raw = []

for row in range(TDPVolumeAtPath_w.shape[0]):
    # Get the TDP W Score Bag
    tdpW_bag = np.delete(TDPVolumeAtPath_w, row, axis=0)
    
    # Calculate the Covariance matrix for bagged TDP W score
    covMatTDP_bag = c.covCal(tdpW_bag, tdpW_bag, cov_thresh)
    
    # Calculate Cov Mat TDP > TAU Raw - for bagged TDP W Score / For TAU we would use the whole.
    _, cmpCovTDP_gt_TAU_raw_bag = c.covCalSigXYRaw(TAUVolumeAtPath_w, tdpW_bag, cov_volAtPath_w_dict["TAU"], 
                                                   covMatTDP_bag, cov_thresh)

    bagged_TDP_gt_TAU_raw.append(cmpCovTDP_gt_TAU_raw_bag)

CPU times: user 25.7 s, sys: 101 ms, total: 25.8 s
Wall time: 25.8 s


In [30]:
len(bagged_TDP_gt_TAU_raw)

30

### Save

In [31]:
# bagged_TAU_gt_TDP_raw
with open(os.path.join(thickAtPath_CalData, 'bagged_TAU_gt_TDP_raw.pkl'), 'wb') as f:
    pickle.dump(bagged_TAU_gt_TDP_raw, f)
f.close()

# bagged_TDP_gt_TAU_raw
with open(os.path.join(thickAtPath_CalData, 'bagged_TDP_gt_TAU_raw.pkl'), 'wb') as f:
    pickle.dump(bagged_TDP_gt_TAU_raw, f)
f.close()

# Calculate Mean and Std of Bagging

## TAU > TDP raw

In [56]:
# Convert the list of bagged TAU>TDP raw to a 3D NumPy array
bagged_TAU_gt_TDP_raw_3d = np.array(bagged_TAU_gt_TDP_raw)

# Calculate the mean and standard deviation along the specified axis = 0 / along # of subjects
TAU_gt_TDP_mean_values = np.nanmean(bagged_TAU_gt_TDP_raw_3d, axis=0)
TAU_gt_TDP_std_dev_values = np.nanstd(bagged_TAU_gt_TDP_raw_3d, axis=0, ddof=1)  # Set ddof to 1 for sample standard deviation

  TAU_gt_TDP_mean_values = np.nanmean(bagged_TAU_gt_TDP_raw_3d, axis=0)


In [57]:
pd.DataFrame(cov_volAtPath_w_dict["TAU_gt_TDP_raw"]).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.525451,0.729432,0.389319,0.092801,,0.170728,0.613926,,0.318436,...,,,0.368125,0.555597,,0.362267,0.478136,0.799606,,0.768083


In [58]:
pd.DataFrame(TAU_gt_TDP_mean_values).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.509965,0.716924,0.391083,0.096675,,0.176238,0.609199,,0.321963,...,,,0.363801,0.554403,0.703457,0.364887,0.478333,0.794079,0.892376,0.763348


In [59]:
pd.DataFrame(TAU_gt_TDP_std_dev_values).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.032813,0.034922,0.053609,0.019851,,0.039971,0.066807,,0.056795,...,,,0.053361,0.064181,0.031544,0.055495,0.059994,0.043046,0.022695,0.040502


## TDP > TAU raw

In [65]:
# Convert the list of bagged TDP>TAU raw to a 3D NumPy array
bagged_TDP_gt_TAU_raw_3d = np.array(bagged_TDP_gt_TAU_raw)

# Calculate the mean and standard deviation along the specified axis = 0 / along # of subjects
TDP_gt_TAU_mean_values = np.nanmean(bagged_TDP_gt_TAU_raw_3d, axis=0)
TDP_gt_TAU_std_dev_values = np.nanstd(bagged_TDP_gt_TAU_raw_3d, axis=0, ddof=1)  # Set ddof to 1 for sample standard deviation

  TDP_gt_TAU_mean_values = np.nanmean(bagged_TDP_gt_TAU_raw_3d, axis=0)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


In [66]:
pd.DataFrame(cov_volAtPath_w_dict["TDP_gt_TAU_raw"]).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.474549,0.270568,0.610681,0.907199,,0.829272,0.386074,,0.681564,...,,,0.631875,0.444403,,0.637733,0.521864,0.200394,,0.231917


In [67]:
pd.DataFrame(TDP_gt_TAU_mean_values).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.470325,0.273899,0.608787,0.901894,0.936009,0.815622,0.386968,,0.678368,...,0.410789,,0.595303,0.444821,,0.63472,0.520607,0.202802,,0.234299


In [68]:
pd.DataFrame(TDP_gt_TAU_std_dev_values).iloc[0:1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,,0.046285,0.04362,0.041456,0.032599,0.014116,0.045311,0.036499,,0.048721,...,0.099852,,0.041442,0.047333,,0.05191,0.046457,0.027541,,0.03112


In [71]:
for i in range(30):
    print(bagged_TDP_gt_TAU_raw[i][0][5])

nan
nan
nan
0.9459903095282075
nan
nan
nan
nan
0.9260272738093326
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


Whys is there non NaN value in place where there are NaN in the whole cov matrix?
-->