# Imports

In [1]:
import numpy as np
import os
import sys
import pickle
import scipy
import scipy.stats as stats
import pandas as pd
import seaborn as sns
import matplotlib as plt
import statsmodels.api
import statsmodels as sm

## Helper Functions

In [3]:
%store -r analysis_hf
sys.path.insert(0, analysis_hf)

import nodeMapping as nm
import normalize as norm

# Set Directory Paths

In [4]:
# Directory path to load formatted Thickness Data
%store -r thick_dataDir
# Directory path to save Calcualted Analysis Data
%store -r thick_CalData
# Directory path to save the Generated Figures
%store -r thickness_Fig

In [5]:
# # Directory path to load preconstructed Atlas Data
# %store -r baseDir

# # Directory path to load formatted Pathology Data
# %store -r path_dataDir
# # Directory path to save Calcualted Analysis Data - Pathology
# %store -r path_CalData

# # Directory path to load formatted Thickness At Path Data
# %store -r thickAtPath_dataDir
# # Directory path to save Calcualted Analysis Data
# %store -r thickAtPath_CalData
# # Directory path to save Figures
# %store -r thickAtPath_Fig

# Loading

## Loading Volume W Score (TAU, TDP) - 400 Regions

In [6]:
# TAU_vol_w
with open(os.path.join(thick_CalData, 'TAU_vol_w.pkl'), 'rb') as f:
    TAU_vol_w = pickle.load(f)
f.close()

# TDP_vol_w
with open(os.path.join(thick_CalData, 'TDP_vol_w.pkl'), 'rb') as f:
    TDP_vol_w = pickle.load(f)
f.close()

# T-test between TAU vs TDP

In [7]:
# Number of regions of Pathology (N = 40)
N = TAU_vol_w.shape[1]

In [8]:
N

400

In [9]:
TAU_vol_w.shape

(26, 400)

In [10]:
TDP_vol_w.shape

(30, 400)

## T-test. Since the sample numbers are different between TAU and TDP, we would perform Welch's t-test

In [11]:
TAU_gt_TDP_Ttest_stat_list = []
TAU_gt_TDP_Ttest_pval_list = []

TDP_gt_TAU_Ttest_stat_list = []
TDP_gt_TAU_Ttest_pval_list = []


for i in range(N):
    TAU_data = TAU_vol_w[:, i]
    TDP_data = TDP_vol_w[:, i]

    # ignore NaN Values in T-test
    TAU_gt_TDP_Ttest = stats.ttest_ind(a=TAU_data, b=TDP_data, equal_var=False, alternative='greater', nan_policy='omit')
    TDP_gt_TAU_Ttest = stats.ttest_ind(a=TAU_data, b=TDP_data, equal_var=False, alternative='less', nan_policy='omit')

    TAU_gt_TDP_Ttest_stat = TAU_gt_TDP_Ttest[0]
    TAU_gt_TDP_Ttest_pval = TAU_gt_TDP_Ttest[1]

    TAU_gt_TDP_Ttest_stat_list.append(TAU_gt_TDP_Ttest_stat)
    TAU_gt_TDP_Ttest_pval_list.append(TAU_gt_TDP_Ttest_pval)

    TDP_gt_TAU_Ttest_stat = TDP_gt_TAU_Ttest[0]
    TDP_gt_TAU_Ttest_pval = TDP_gt_TAU_Ttest[1]

    TDP_gt_TAU_Ttest_stat_list.append(TDP_gt_TAU_Ttest_stat)
    TDP_gt_TAU_Ttest_pval_list.append(TDP_gt_TAU_Ttest_pval)
    

## Multiple Comparison Correction / alpha = 0.05 / Method: Benjamini/Hochberg (non-negative)

### Corrected for TAU > TDP

In [12]:
TAU_gt_TDP_reject, TAU_gt_TDP_pvals_corrected, alphacSidak, alphacBonf = sm.stats.multitest.multipletests(TAU_gt_TDP_Ttest_pval_list, alpha=0.05,
                                                                                                           method='fdr_bh', is_sorted=False, 
                                                                                                           returnsorted=False)

### Corrected for TDP > TAU

In [13]:
TDP_gt_TAU_reject, TDP_gt_TDP_pvals_corrected, alphacSidak, alphacBonf = sm.stats.multitest.multipletests(TDP_gt_TAU_Ttest_pval_list, alpha=0.05,
                                                                                                          method='fdr_bh', is_sorted=False, 
                                                                                                          returnsorted=False)

## TAU > TDP

In [15]:
# Pandas DataFrame
TAU_gt_TDP_df = pd.DataFrame(np.concatenate((np.array(TAU_gt_TDP_pvals_corrected).reshape((-1, 1)), 
                                             np.array(TAU_gt_TDP_reject).reshape((-1, 1))), axis=1), 
                                             columns=['corrected pval', 'Result'])

TAU_gt_TDP_df['Result'] = TAU_gt_TDP_df['Result'].astype('bool')

In [16]:
TAU_gt_TDP_df

Unnamed: 0,corrected pval,Result
0,0.999607,False
1,0.999607,False
2,0.999607,False
3,0.999607,False
4,0.999607,False
...,...,...
395,0.999607,False
396,0.999607,False
397,0.999607,False
398,0.999607,False


## TDP > TAU

In [17]:
# Pandas DataFrame
TDP_gt_TAU_df = pd.DataFrame(np.concatenate((np.array(TDP_gt_TDP_pvals_corrected).reshape((-1, 1)), 
                                             np.array(TDP_gt_TAU_reject).reshape((-1, 1))), axis=1), 
                                             columns=['corrected pval', 'Result'])

TDP_gt_TAU_df['Result'] = TDP_gt_TAU_df['Result'].astype('bool')

In [18]:
TDP_gt_TAU_df

Unnamed: 0,corrected pval,Result
0,0.382904,False
1,0.314857,False
2,0.923878,False
3,0.479054,False
4,0.953624,False
...,...,...
395,0.728061,False
396,0.371417,False
397,0.395798,False
398,0.594183,False


# Save T-test results as csv

In [19]:
# Original
TAU_gt_TDP_df.to_csv(thickness_Fig + '/TAU_gt_TDP_Ttest(Vol_WScore)_400.csv', index=True)

TDP_gt_TAU_df.to_csv(thickness_Fig + '/TDP_gt_TAU_Ttest(Vol_WScore)_400.csv', index=True)