In [1]:
#general import
import os
import numpy as np
import pandas as pd

#nn import
from torch import device, load

#stats import
from scipy.stats import ttest_rel, wilcoxon
from statsmodels.stats import multitest
from statistics import median

#brain visualization import
from nilearn import regions, maskers, datasets, surface, plotting, image
from nilearn.plotting import plot_roi, plot_stat_map
from nilearn.maskers import NiftiMasker
import matplotlib.pyplot as plt

#package import (for class) --> TO CHANGE
import sys
sys.path.append('/home/maelle/GitHub_repositories/cNeuromod_encoding_2020')  
import models

In [2]:
out_path = '/home/maelle/Results/figures/surface'
r2_test_model_path = '/home/maelle/Results/best_models'
r2_by_run_path = '/home/maelle/Results/best_models/predict_S4_runs'
MIST_path = '/home/maelle/DataBase/fMRI_parcellations/MIST_parcellation/Parcellations/MIST_ROI.nii.gz'
voxel_mask = '/home/maelle/GitHub_repositories/cNeuromod_encoding_2020/parcellation/STG_middle.nii.gz'
mist_roi_labels = '/home/maelle/DataBase/fMRI_parcellations/MIST_parcellation/Parcel_Information/MIST_ROI.csv'

label_df = pd.read_csv(mist_roi_labels, sep=';')
print(label_df.columns)

convs = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'conv6', 'conv7']
subs = ['sub-01', 'sub-02', 'sub-03', 'sub-04', 'sub-05', 'sub-06']
scales = [('MIST_ROI', 210), ('auditory_Voxels', 556)] 
conv = 'conv4'
baseline = 'no_ft'
null_model = 'no_init'
diff_name = '{}-{}'.format(conv, baseline)


Index(['roi', 'label', 'name', 'size', 'symmetry', 'laterality', 'x', 'y', 'z',
       'neighbour', 'parent', 'overlap'],
      dtype='object')


In [3]:
def sum_conditions(conditions) : 
    correct = True
    for condition in conditions:
        if condition == False:
            correct = False
            break
    return correct

In [4]:
def get_specific_file_path(path, *cond):
    specific_file_path = None
    for file in os.listdir(path):
        filepath = os.path.join(path, file)
        conditions = [(obj in file) if boolean else (obj not in file) for (obj, boolean) in cond]
        specific_file = sum_conditions(conditions)
        specific_file_path = filepath if specific_file else specific_file_path
    return specific_file_path

In [5]:
def n_best_rois(data, n, columns_label = label_df['label']):
    runs_df = pd.DataFrame(data, columns=columns_label)  
    mean_runs_df = runs_df.mean().sort_values(ascending=False)
    best_n_roi = list(mean_runs_df.head(n).index)

    return best_n_roi

In [6]:
def df_runs_from_list(data, list_roi, sub, columns_label = label_df['label']):
    runs_df = pd.DataFrame(data, columns=columns_label)
    all_runs_df = runs_df.melt(var_name = 'roi', value_name='r2', ignore_index=False)
    best_runs = all_runs_df.loc[(all_runs_df['roi'].isin(list_roi))].copy()
    best_runs['subject'] = sub

    return best_runs

In [7]:
def best_rois_in_MIST_array(data_array, nb_occ=3):
    a = np.unique(np.argmax(data_array, axis=1), return_counts=True)
    nb_i = [(i, nb, label_df['label'].iloc[i]) for i, nb in zip(a[0], a[1]) if nb>=nb_occ]
    return nb_i

In [8]:
def stats_between_pop(pop1, pop2, test = ttest_rel, threshold = 0.05, difference=False):
    sign_diff_rois = []
    all_diff_rois = []
    for i, (pop1_roi, pop2_roi) in enumerate(zip(pop1, pop2)):
        if difference :
            pop1_roi = pop1_roi - pop2_roi
            pop2_roi = None
        stat, pvalue = test(pop1_roi, pop2_roi)
        all_diff_rois.append((i, pvalue))
        if pvalue <= threshold :
            sign_diff_rois.append((i, pvalue))
    return (all_diff_rois, sign_diff_rois)

In [9]:
#difference baseline vs null model__________________________________________________________________________________

#fig, ax = plt.subplots(2, 6, figsize=(30,10))
#fig.tight_layout()

null_baseline_df = pd.DataFrame(columns=['sub', 'scale', 'i', 'roi_label', 'p_value', 'sign'])

for row, (scale, size) in enumerate(scales):    
    print('significative regions between null and baseline in {} :'.format(scale))
    for col, sub in enumerate(subs):
        #load data-----------------------------------------------------------------------------------------------
        run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
        run_null_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(null_model, True))

        run_baseline_data = np.load(run_baseline_file_path) 
        run_null_data = np.load(run_null_file_path)
        
        #Wilcoxon-------------------------------------------------------------------------------------------------
        (all_region, sign_region) = stats_between_pop(run_baseline_data.T, run_null_data.T, test = wilcoxon, 
                                    threshold=0.05, difference = True)
        
        #FDR------------------------------------------------------------------------------------------------------
        p_values = [p_val for i, p_val in all_region]
        #meth1 / by
        rejected, p_vals_corr = multitest.fdrcorrection(p_values, method='n')
        sign_fdr = [p_val for p_val, sign in zip(p_vals_corr, rejected) if sign]
        #meth2 - manual / bh
        p_values = np.sort(p_values)
        N=size
        i = np.arange(1, N+1)
        q = 0.05
        #ax[row, col].plot(i, p_values, 'b.', label='$p(i)$')
        #ax[row, col].plot(i, q * i / N, 'r', label='$q i / N$')
        
        below = p_values < (q * i / N) # True where p(i)<qi/N
        max_below = np.max(np.where(below)[0]) # Max Python array index where p(i)<qi/N
        
        #collect everything in a nice df---------------------------------------------------------------------------
        baseline_sub = pd.DataFrame()
        baseline_sub['i']=[i for i, p_val in all_region]
        baseline_sub['roi_label']=label_df['label'] if scale == 'MIST_ROI' else None
        baseline_sub['p_value']=p_vals_corr
        baseline_sub['sign']=rejected
        baseline_sub['sub']=sub
        baseline_sub['scale']=scale
        null_baseline_df = pd.concat([null_baseline_df, baseline_sub], axis='rows')
        print('{} : actual size : {}, significative after wilcoxon : {}, after fdr - bh : {}, by : {}'.format(sub, size, 
                len(sign_region), max_below+1, len(sign_fdr)))

significative regions between null and baseline in MIST_ROI :
sub-01 : actual size : 210, significative after wilcoxon : 131, after fdr - bh : 120, by : 102
sub-02 : actual size : 210, significative after wilcoxon : 128, after fdr - bh : 124, by : 102
sub-03 : actual size : 210, significative after wilcoxon : 141, after fdr - bh : 134, by : 113
sub-04 : actual size : 210, significative after wilcoxon : 149, after fdr - bh : 145, by : 126
sub-05 : actual size : 210, significative after wilcoxon : 120, after fdr - bh : 108, by : 89
sub-06 : actual size : 210, significative after wilcoxon : 150, after fdr - bh : 144, by : 124
significative regions between null and baseline in auditory_Voxels :
sub-01 : actual size : 556, significative after wilcoxon : 546, after fdr - bh : 546, by : 532
sub-02 : actual size : 556, significative after wilcoxon : 532, after fdr - bh : 532, by : 525
sub-03 : actual size : 556, significative after wilcoxon : 547, after fdr - bh : 547, by : 542
sub-04 : actual

In [10]:
for roi in ['R_STgyr_m', 'L_STgyr_m', 'R_STgyr_p', 'L_STgyr_p']:
    roi_stat = null_baseline_df.loc[null_baseline_df['roi_label']==roi]
    print(roi_stat)

        sub     scale    i  roi_label       p_value  sign
153  sub-01  MIST_ROI  153  R_STgyr_m  9.054811e-07  True
153  sub-02  MIST_ROI  153  R_STgyr_m  4.871111e-07  True
153  sub-03  MIST_ROI  153  R_STgyr_m  2.971604e-07  True
153  sub-04  MIST_ROI  153  R_STgyr_m  2.746569e-07  True
153  sub-05  MIST_ROI  153  R_STgyr_m  1.879160e-05  True
153  sub-06  MIST_ROI  153  R_STgyr_m  1.209259e-06  True
        sub     scale    i  roi_label       p_value  sign
152  sub-01  MIST_ROI  152  L_STgyr_m  2.524008e-06  True
152  sub-02  MIST_ROI  152  L_STgyr_m  4.871111e-07  True
152  sub-03  MIST_ROI  152  L_STgyr_m  2.971604e-07  True
152  sub-04  MIST_ROI  152  L_STgyr_m  2.746569e-07  True
152  sub-05  MIST_ROI  152  L_STgyr_m  3.468166e-06  True
152  sub-06  MIST_ROI  152  L_STgyr_m  1.209259e-06  True
        sub     scale    i  roi_label       p_value  sign
205  sub-01  MIST_ROI  205  R_STgyr_p  2.524008e-06  True
205  sub-02  MIST_ROI  205  R_STgyr_p  4.871111e-07  True
205  sub-03  M

In [11]:
#difference between first ranking rois and the following ROIs ?_____________________________________________________
rois = ['R_STgyr_m', 'L_STgyr_m', 'L_STgyr_p', 'R_STgyr_p']
roi = 'R_VVISnet_l'
scale = 'MIST_ROI'
n=40

for sub in subs : 
    run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
    run_baseline_data = np.load(run_baseline_file_path)
    
    best_rois = n_best_rois(run_baseline_data, n)
    roi_i = best_rois.index(roi)
    next_roi = best_rois[roi_i+1]
    
    roi_index = label_df.loc[label_df['label']==roi].index.values[0]
    next_roi_index = label_df.loc[label_df['label']==next_roi].index.values[0]
    
    roi_data = run_baseline_data.T[roi_index]
    next_roi_data = run_baseline_data.T[next_roi_index]
    mean_data = run_baseline_data.mean(axis=1)
    
    stat, pvalue = wilcoxon(roi_data, next_roi_data)
    stat_m, pvalue_m = wilcoxon(roi_data, mean_data)
    
    print('mean r² in 48 runs for {} : {}'.format(roi, roi_data.mean()))
    print('pvalue between {} and the following roi {} in {} : {}'.format(roi, next_roi, sub, pvalue))
    print('pvalue between {} and the mean of 210 ROI in {} : {}'.format(roi, sub, pvalue_m))
    print('')

mean r² in 48 runs for R_VVISnet_l : 0.10562860825474017
pvalue between R_VVISnet_l and the following roi L_PVISnet_l in sub-01 : 0.4849130252357077
pvalue between R_VVISnet_l and the mean of 210 ROI in sub-01 : 2.5572595311710004e-09

mean r² in 48 runs for R_VVISnet_l : 0.09560815178359706
pvalue between R_VVISnet_l and the following roi R_DVIS_vl in sub-02 : 0.6490856243865604
pvalue between R_VVISnet_l and the mean of 210 ROI in sub-02 : 6.262036192682993e-09

mean r² in 48 runs for R_VVISnet_l : 0.1175624973224953
pvalue between R_VVISnet_l and the following roi L_VVISnet_l in sub-03 : 0.2401469317245789
pvalue between R_VVISnet_l and the mean of 210 ROI in sub-03 : 3.3101626873069728e-09

mean r² in 48 runs for R_VVISnet_l : 0.07366997051569261
pvalue between R_VVISnet_l and the following roi L_TP in sub-04 : 0.0015558180213111259
pvalue between R_VVISnet_l and the mean of 210 ROI in sub-04 : 2.704876776658351e-07

mean r² in 48 runs for R_VVISnet_l : 0.06832989554338008
pvalue b

In [12]:
#difference between roi/voxels between each subject_________________________________________________________________
roi = 'R_STgyr_m'

for scale, size in [('MIST_ROI', 210)]:
    all_subs_roi_data = {sub:'' for sub in subs}
    for sub in subs:
        run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
        run_baseline_data = np.load(run_baseline_file_path).T
        roi_index = label_df.loc[label_df['label']==roi].index.values[0]
        all_subs_roi_data[sub] = run_baseline_data[roi_index]
    
    for sub in subs : 
        print('{} stats for {}'.format(scale, sub)) 
        for sub_2 in subs : 
            if not sub_2 == sub : 
                if sub_2 == 'sub-05' or sub == 'sub-05': 
                    size = min(len(all_subs_roi_data[sub]), len(all_subs_roi_data[sub_2]))
                else : 
                    size = len(all_subs_roi_data[sub])
                stat, pvalue = wilcoxon(all_subs_roi_data[sub][:size], all_subs_roi_data[sub_2][:size])
                print('wilcoxon p value for {} between {} and {} : {}'.format (roi, sub, sub_2, pvalue))
            else : 
                pass

MIST_ROI stats for sub-01
wilcoxon p value for R_STgyr_m between sub-01 and sub-02 : 0.0015558180213111259
wilcoxon p value for R_STgyr_m between sub-01 and sub-03 : 5.3511465107094877e-08
wilcoxon p value for R_STgyr_m between sub-01 and sub-04 : 9.101991527555763e-07
wilcoxon p value for R_STgyr_m between sub-01 and sub-05 : 3.602888286739981e-06
wilcoxon p value for R_STgyr_m between sub-01 and sub-06 : 0.03341892830561937
MIST_ROI stats for sub-02
wilcoxon p value for R_STgyr_m between sub-02 and sub-01 : 0.0015558180213111259
wilcoxon p value for R_STgyr_m between sub-02 and sub-03 : 6.221974335467492e-07
wilcoxon p value for R_STgyr_m between sub-02 and sub-04 : 1.4333775135982142e-05
wilcoxon p value for R_STgyr_m between sub-02 and sub-05 : 2.342251730854226e-07
wilcoxon p value for R_STgyr_m between sub-02 and sub-06 : 0.5049791130786143
MIST_ROI stats for sub-03
wilcoxon p value for R_STgyr_m between sub-03 and sub-01 : 5.3511465107094877e-08
wilcoxon p value for R_STgyr_m be

In [13]:
#higher r² score for each subject
scale = 'auditory_Voxels'
for sub in subs:
    run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
    run_baseline_data = np.load(run_baseline_file_path).T
    max_max = np.amax(run_baseline_data, axis=0).max()
    max_mean = np.amax(run_baseline_data, axis=0).mean()
    max_min = np.amax(run_baseline_data, axis=0).min()
    print('{} : {} ({}, {})'.format(sub, max_mean, max_min, max_max))

sub-01 : 0.38749965967710825 (0.2244905041111085, 0.7043780404483821)
sub-02 : 0.37032020654931774 (0.22286351421871875, 0.5664209197853405)
sub-03 : 0.4544466341171691 (0.3364459460096907, 0.6000948025319877)
sub-04 : 0.43354333292451713 (0.3188601487540367, 0.6059322657815319)
sub-05 : 0.27066765168314477 (0.14020527181859743, 0.4282404911346147)
sub-06 : 0.3834112304979817 (0.016418766259344686, 0.5316398817914221)


In [17]:
#number of parcels significatively different from baseline for each subject/conv
finetune_df = pd.DataFrame()
scale = 'MIST_ROI'

for sub in subs:
    finetune = {'sub' : sub, 'conv1':[], 'conv2':[], 'conv3':[], 'conv4':[],'conv5':[],'conv6':[],'conv7':[]}
    for conv in ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'conv6', 'conv7'] : 
        run_conv_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(conv, True))
        run_conv_data = np.load(run_conv_file_path)
        run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
        run_baseline_data = np.load(run_baseline_file_path)
    
        (all_region, sign_region) = stats_between_pop(run_conv_data.T, run_baseline_data.T, test = wilcoxon, 
                                    threshold=0.05, difference = True)
        p_values = [p_val for i, p_val in all_region]
        #meth1
        rejected, p_vals_corr = multitest.fdrcorrection(p_values, method='n')
        sign_fdr = [p_val for p_val, sign in zip(p_vals_corr, rejected) if sign]
        finetune[conv] = len(sign_fdr)
    
    finetune_df = pd.concat([finetune_df, pd.Series(finetune)], axis=1)

finetune_df.columns = finetune_df.iloc[0]
finetune_df.drop('sub', inplace=True)
print(finetune_df)
a = finetune_df.rank(axis=0, ascending=False)
print(a)

sub   sub-01 sub-02 sub-03 sub-04 sub-05 sub-06
conv1     24     27    105     61     23     21
conv2     61     64     42      8     69      1
conv3     29      1     67     33     39      7
conv4     69     30     45     77     24      0
conv5     65     31     25      5     11    122
conv6    148      0     85    110     15      5
conv7     49      2     26     25      2      6
sub    sub-01  sub-02  sub-03  sub-04  sub-05  sub-06
conv1     7.0     4.0     1.0     3.0     4.0     2.0
conv2     4.0     1.0     5.0     6.0     1.0     6.0
conv3     6.0     6.0     3.0     4.0     2.0     3.0
conv4     2.0     3.0     4.0     2.0     3.0     7.0
conv5     3.0     2.0     7.0     7.0     6.0     1.0
conv6     1.0     7.0     2.0     1.0     5.0     5.0
conv7     5.0     5.0     6.0     5.0     7.0     4.0


In [18]:
finetune_df = pd.DataFrame()
scale = 'MIST_ROI'

ROIs = [
    'R_STgyr_m','L_STgyr_m','R_STgyr_p','L_STgyr_p',
    'R_MTgyr_p','L_MTgyr_p','R_HSgyr','L_HSgyr',    
    'R_VVISnet_l', 'L_VVISnet_l', 'R_PVISnet_l' , 'L_PVISnet_l', 'R_LVISnet_DP','L_LVISnet_DP'
        ]

subs_df = pd.DataFrame()
for sub in subs:
    print(sub)
    df = pd.DataFrame(index=ROIs)
    for conv in convs :     
        run_conv_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(conv, True))
        run_conv_data = np.load(run_conv_file_path)
        run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
        run_baseline_data = np.load(run_baseline_file_path)
        
        dic = dict()
        dic_p = dict()
        for roi in ROIs:
            roi_index = label_df.loc[label_df['label']==roi].index.values[0]
            roi_basl_data = run_baseline_data.T[roi_index]
            roi_conv_data = run_conv_data.T[roi_index]
            stat, p_value = wilcoxon(roi_conv_data, roi_basl_data, alternative='greater')
            dic[roi] = roi_conv_data.mean() - roi_basl_data.mean()
            dic_p[roi] = p_value < 0.05
        df = pd.concat([df, pd.Series(dic_p, name=conv)], axis=1)
    
    rank_df = df.rank(axis=1, ascending=False)
    rank_df.loc['median'] = rank_df.median(axis='rows') 
    subs_df = pd.concat([subs_df, rank_df.loc['median']], axis=1)
    print(df)

    
subs_df['total'] = subs_df.median(axis='columns')
#print(subs_df)
    
    #finetune_df = pd.concat([finetune_df, pd.Series(finetune)], axis=1)

sub-01
              conv1  conv2  conv3  conv4  conv5  conv6  conv7
R_STgyr_m      True   True   True   True  False   True  False
L_STgyr_m      True   True   True   True   True   True  False
R_STgyr_p      True   True   True   True   True   True   True
L_STgyr_p      True   True   True   True  False   True  False
R_MTgyr_p      True   True   True   True   True   True   True
L_MTgyr_p      True   True   True   True   True   True   True
R_HSgyr        True   True   True   True   True   True   True
L_HSgyr        True   True   True   True  False   True  False
R_VVISnet_l    True   True   True   True   True   True   True
L_VVISnet_l    True   True   True   True   True   True   True
R_PVISnet_l    True   True   True   True   True   True   True
L_PVISnet_l    True   True   True   True   True   True   True
R_LVISnet_DP   True   True   True   True   True   True   True
L_LVISnet_DP   True   True   True   True   True   True   True
sub-02
              conv1  conv2  conv3  conv4  conv5  conv6  

In [19]:
#significative difference between conv4 and other finetuned models
finetune_df = pd.DataFrame()
scale = 'MIST_ROI'

ROIs = [
    'R_STgyr_m','L_STgyr_m','R_STgyr_p','L_STgyr_p',
    'R_MTgyr_p','L_MTgyr_p','R_HSgyr','L_HSgyr',    
    'R_VVISnet_l', 'L_VVISnet_l', 'R_PVISnet_l' , 'L_PVISnet_l', 'R_LVISnet_DP','L_LVISnet_DP'
        ]

convs = ['conv1', 'conv2', 'conv3', 'conv5', 'conv6', 'conv7']
for sub in subs:
    print(sub)
    df = pd.DataFrame(index=ROIs)
    for conv in convs : 
        run_conv_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(conv, True))
        run_conv_data = np.load(run_conv_file_path)
        run_conv4_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('conv4', True))
        conv4_data = np.load(run_conv4_file_path)
        
        dic = dict()
        for roi in ROIs:
            roi_index = label_df.loc[label_df['label']==roi].index.values[0]
            roi_conv4_data = conv4_data.T[roi_index]
            roi_conv_data = run_conv_data.T[roi_index]
            stat, p_value = wilcoxon(roi_conv_data, roi_conv4_data)#, alternative='greater')
            dic[roi] = p_value < 0.05
        df = pd.concat([df, pd.Series(dic, name=conv)], axis=1)
    print(df)


sub-01
              conv1  conv2  conv3  conv5  conv6  conv7
R_STgyr_m     False  False  False   True  False   True
L_STgyr_m      True  False  False   True  False   True
R_STgyr_p      True  False  False   True   True   True
L_STgyr_p     False  False  False   True  False   True
R_MTgyr_p     False  False  False  False  False  False
L_MTgyr_p     False  False  False   True   True  False
R_HSgyr        True  False  False   True  False   True
L_HSgyr       False  False  False   True  False   True
R_VVISnet_l    True  False   True   True   True   True
L_VVISnet_l    True  False  False   True   True   True
R_PVISnet_l    True  False   True   True   True   True
L_PVISnet_l    True  False  False   True   True   True
R_LVISnet_DP  False  False  False  False  False   True
L_LVISnet_DP  False  False  False   True  False   True
sub-02
              conv1  conv2  conv3  conv5  conv6  conv7
R_STgyr_m     False  False  False  False   True   True
L_STgyr_m     False   True  False  False   True   T

In [23]:
#significative difference between conv4 and baseline for specific ROIs

ROIs = [
    'R_STgyr_m','L_STgyr_m','R_STgyr_p','L_STgyr_p',
    'R_MTgyr_p','L_MTgyr_p','R_HSgyr','L_HSgyr',    
    'R_VVISnet_l', 'L_VVISnet_l', 'R_PVISnet_l' , 'L_PVISnet_l', 
    'R_LVISnet_DP','L_LVISnet_DP'
        ]
conv = 'conv4'
scale = 'MIST_ROI'
n = 7
def best_roi_rscore(runs_data):
    mean_runs_data = runs_data.mean(axis=0)
    max_r2 = np.max(mean_runs_data)
    roi_max_r2 = np.argmax(mean_runs_data)
    roi_label = label_df['label'].loc[roi_max_r2]
    return (roi_max_r2, roi_label, max_r2)

for sub in subs :
    print("")
    print(sub)
    run_conv_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(conv, True))
    run_conv_data = np.load(run_conv_file_path)
    run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
    run_baseline_data = np.load(run_baseline_file_path)
    run_null_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(null_model, True))
    run_null_data = np.load(run_null_file_path)
    
    
    max_conv = best_roi_rscore(run_conv_data)
    max_basl = best_roi_rscore(run_baseline_data)
    print('best roi in')
    print('    baseline data : {}'.format(max_basl))
    print(n_best_rois(run_baseline_data, n))
    print('    conv data : {}'.format(max_conv))
    print(n_best_rois(run_conv_data, n))
    
    diff_np = run_conv_data.mean(axis=0) - run_baseline_data.mean(axis=0)
    diff_np = np.array([(i, diff) for i, diff in enumerate(diff_np)])
    
    for i in range(5):
        idx_max = diff_np[:,1].argmax()
        roi_max = int(diff_np[idx_max][0])
        print('max increase', roi_max, label_df['label'].loc[roi_max], diff_np[:,1].max())
        diff_np = np.delete(diff_np, idx_max, 0)
    
    (all_region2, sign_region2) = stats_between_pop(run_baseline_data.T, run_null_data.T, 
                                test = wilcoxon, threshold=0.05, difference = True)
    p_values2 = [p_val for i, p_val in all_region2]
    rejected2, p_vals_corr2 = multitest.fdrcorrection(p_values2, method='n')
    sign_fdr2 = [p_val for p_val, sign in zip(p_vals_corr2, rejected2) if sign]
    
    (all_region3, sign_region3) = stats_between_pop(run_conv_data.T, run_null_data.T, 
                                test = wilcoxon, threshold=0.05, difference = True)
    p_values3 = [p_val for i, p_val in all_region3]
    rejected3, p_vals_corr3 = multitest.fdrcorrection(p_values3, method='n')
    sign_fdr3 = [p_val for p_val, sign in zip(p_vals_corr3, rejected3) if sign]
    
    (all_region, sign_region) = stats_between_pop(run_conv_data.T, run_baseline_data.T, 
                                test = wilcoxon, threshold=0.05, difference = True)
    p_values = [p_val for i, p_val in all_region]
    rejected, p_vals_corr = multitest.fdrcorrection(p_values, method='n')
    sign_fdr = [p_val for p_val, sign in zip(p_vals_corr, rejected) if sign]
    print('rois with sign p (fdr - by  corrected) : ')
    print('null/basl vs null/conv vs basl/conv: {} {} {}'.format(len(sign_fdr2), len(sign_fdr3), len(sign_fdr)))
    
    for i, roi in enumerate(ROIs) :
        if i == 0:
            print('Auditory ROIs :')
        elif i == 8:
            print('Visuals ROIs :')
        roi_index = label_df.loc[label_df['label']==roi].index.values[0]
        print(roi_index, roi, p_vals_corr[roi_index], rejected[roi_index])


sub-01
best roi in
    baseline data : (153, 'R_STgyr_m', 0.21299753696985646)
['R_STgyr_m', 'L_STgyr_m', 'L_STgyr_p', 'L_MTgyr_p', 'R_STgyr_p', 'L_VVISnet_l', 'R_VVISnet_l']
    conv data : (153, 'R_STgyr_m', 0.230501498393858)
['R_STgyr_m', 'L_STgyr_m', 'L_STgyr_p', 'R_STgyr_p', 'L_MTgyr_p', 'L_PVISnet_l', 'R_VVISnet_l']
max increase 205 R_STgyr_p 0.0410280909289662
max increase 103 L_PVISnet_l 0.03910761607379051
max increase 26 L_PVISnet_dm 0.03662588652136135
max increase 182 L_PVISnet_vm 0.03632375682815256
max increase 93 L_SFsul_a 0.03499308575655926
rois with sign p (fdr - by  corrected) : 
null/basl vs null/conv vs basl/conv: 102 126 69
Auditory ROIs :
153 R_STgyr_m 0.018851822350775795 True
152 L_STgyr_m 0.007981916829394378 True
205 R_STgyr_p 0.00021748007115267857 True
204 L_STgyr_p 0.00916166093171613 True
181 R_MTgyr_p 0.11155064134961398 False
141 L_MTgyr_p 0.026718591851855013 True
169 R_HSgyr 0.0009351836986634167 True
170 L_HSgyr 0.13537162679129608 False
Visuals RO

In [24]:
#significative difference between conv4 and baseline for voxels

conv = 'conv4'
scale = 'auditory_Voxels'

for sub in subs :
    print("")
    print(sub)
    run_conv_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),(conv, True))
    run_conv_data = np.load(run_conv_file_path)
    run_baseline_file_path = get_specific_file_path(r2_by_run_path, (sub, True),(scale, True),('_f_', False))
    run_baseline_data = np.load(run_baseline_file_path)
    
    (all_region, sign_region) = stats_between_pop(run_conv_data.T, run_baseline_data.T, 
                                test = wilcoxon, threshold=0.05, difference = True)
    p_values = [p_val for i, p_val in all_region]
    rejected, p_vals_corr = multitest.fdrcorrection(p_values, method='n')
    sign_fdr = [p_val for p_val, sign in zip(p_vals_corr, rejected) if sign]
    print('voxels with sign p before fdr correction : {} and after : {}'.format(len(sign_region), len(sign_fdr)))
    
    sign_voxels = [i for i, reject in enumerate(rejected) if reject]
    sign_vox_basl = np.array([run_baseline_data.T[i,:]for i in sign_voxels]).mean(axis=1)
    sign_vox_conv = np.array([run_conv_data.T[i,:]for i in sign_voxels]).mean(axis=1)
    print(sign_vox_conv.max(), sign_vox_conv.argmax(), sign_vox_basl.max(), sign_vox_basl.argmax())
    
    diff_vox = sign_vox_conv - sign_vox_basl
    print(diff_vox.max(), diff_vox.argmax(), diff_vox.mean())
    
    


sub-01
voxels with sign p before fdr correction : 371 and after : 192
0.39502086324355234 49 0.36822568149428575 49
0.052306612934719554 21 0.027332428825417667

sub-02
voxels with sign p before fdr correction : 549 and after : 536
0.4068875769793008 523 0.35841688185162035 493
0.07910966400598801 14 0.036572274393383265

sub-03
voxels with sign p before fdr correction : 454 and after : 321
0.44120454455269054 263 0.4205483931272534 263
0.036955748058127735 6 0.018969757361607383

sub-04
voxels with sign p before fdr correction : 506 and after : 471
0.4409157494766747 319 0.4077884680303738 319
0.0333709525591242 373 0.01755974701432583

sub-05
voxels with sign p before fdr correction : 355 and after : 193
0.26849338495390496 14 0.25380444068463415 15
0.028531397015681973 123 0.01233679559643746

sub-06
voxels with sign p before fdr correction : 364 and after : 212
0.3542345150795629 182 0.33992447997028297 182
0.03860349279160724 206 0.019256386913526456
