In [1]:
import numpy as np
from scipy.io import loadmat
from scipy import stats
from IPython.core.debugger import set_trace
import pickle
import os
from tqdm import tqdm
import random
from joblib import Parallel, delayed

random.seed(97)
np.random.seed(97)

root_save_dir = "predictions/"
sub_data = "sub_space_data/"

In [2]:
def FDR(vector, q, do_correction = False):
    original_shape = vector.shape
    vector = vector.flatten()
    N = vector.shape[0]
    sorted_vector = sorted(vector)
    if do_correction:
        C = np.sum([1.0/i for i in range(N)])
    else:
        C = 1.0
    thresh = 0
    #a=b
    for i in range(N-1, 0, -1):
        if sorted_vector[i]<= (i*1.0)/N*q/C:
            thresh = sorted_vector[i]
            break
    thresh_vector = vector<=thresh
    thresh_vector = thresh_vector.reshape(original_shape)
    thresh_vector = thresh_vector*1.0
    print("FDR threshold is : {}, {} voxels rejected".format(thresh, thresh_vector.sum()))
    return thresh_vector, thresh

In [3]:
all_subjects = ["F","G","H","I","J","K","L","M","N"]
all_feature_pairs = [
                     ("node_count_punct", "punct_final"), # (NC + PU) - (PU)
                     ("syntactic_surprisal_punct", "punct_final"), # (SS + PU) - (PU)                 
                     ("word_frequency_punct", "punct_final"), # (WF + PU) - (PU)
                     ("word_length_punct", "punct_final"), # (WL + PU) - (PU)
                     ("all_effort_based_metrics_punct", "punct_final"), # (EFF + PU) - (PU)
                     ("pos_dep_tags_all_effort_based_metrics", "all_effort_based_metrics_punct"), # since PD already contains PU, this tests (PD + EFF + PU) - (EFF + PU)
                     ("aggregated_contrege_comp_pos_dep_tags_all_effort_based_metrics", "pos_dep_tags_all_effort_based_metrics"), # (CC + PD + EFF + PU) - (PD + EFF + PU)
                     ("aggregated_contrege_incomp_pos_dep_tags_all_effort_based_metrics", "pos_dep_tags_all_effort_based_metrics"), # (CI + PD + EFF + PU) - (PD + EFF + PU)
                     ("aggregated_incontrege_pos_dep_tags_all_effort_based_metrics", "pos_dep_tags_all_effort_based_metrics"), # (INC + PD + EFF + PU) - (PD + EFF + PU)
                     ("aggregated_bert_PCA_dims_15_contrege_incomp_pos_dep_tags_all_effort_based_metrics", "aggregated_contrege_incomp_pos_dep_tags_all_effort_based_metrics") # (BERT + CI + PD + EFF + PU) - (CI + PD + EFF + PU)
                    ]

In [4]:
all_uncorrected_sig = []
for sub in all_subjects:
    print(sub)
    punct_uncorrected_sig = np.load(root_save_dir + "punct_final" + "/{}_sig.npy".format(sub))
    all_uncorrected_sig.append(punct_uncorrected_sig)
    
    for feat in all_feature_pairs:
        uncorrected_sig = np.load(root_save_dir + "{}_diff_{}".format(feat[0],feat[1]) + "/{}_sig_boot.npy".format(sub))
        all_uncorrected_sig.append(uncorrected_sig)

F
G
H
I
J
K
L
M
N


In [5]:
q = 0.05
all_corrected_sig, _ = FDR(np.hstack(all_uncorrected_sig), q)

FDR threshold is : 0.0008, 45143.0 voxels rejected


In [6]:
last_end = 0
ind = 0
for sub in all_subjects:
    print(sub)
    punct_corrected_sig = all_corrected_sig[last_end:last_end + all_uncorrected_sig[ind].shape[0]]
    np.save(root_save_dir + "punct_final" + "/{}_sig_group_corrected".format(sub), punct_corrected_sig)
    last_end += all_uncorrected_sig[ind].shape[0]
    ind += 1
    
    print("punct_final: Num voxels rejected = {}".format(punct_corrected_sig.sum()))
    
    for feat in all_feature_pairs:
        corrected_sig = all_corrected_sig[last_end:last_end + all_uncorrected_sig[ind].shape[0]]    
        np.save(root_save_dir + "{}_diff_{}".format(feat[0],feat[1]) + "/{}_sig_bootstrap_group_corrected".format(sub), corrected_sig)
        last_end += all_uncorrected_sig[ind].shape[0]
        ind += 1
        
        print("{}: Num voxels rejected = {}".format("{}_diff_{}".format(feat[0],feat[1]), corrected_sig.sum()))

F
punct_final: Num voxels rejected = 2169.0
node_count_punct_diff_punct_final: Num voxels rejected = 176.0
syntactic_surprisal_punct_diff_punct_final: Num voxels rejected = 216.0
word_frequency_punct_diff_punct_final: Num voxels rejected = 332.0
word_length_punct_diff_punct_final: Num voxels rejected = 302.0
all_effort_based_metrics_punct_diff_punct_final: Num voxels rejected = 470.0
pos_dep_tags_all_effort_based_metrics_diff_all_effort_based_metrics_punct: Num voxels rejected = 377.0
aggregated_contrege_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 678.0
aggregated_contrege_comp_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 308.0
aggregated_beam_incontrege_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 472.0
aggregated_bert_PCA_dims_15_contrege_pos_dep_tags_all_effort_based_metrics_diff_aggregated_contrege_pos_dep_

punct_final: Num voxels rejected = 1037.0
node_count_punct_diff_punct_final: Num voxels rejected = 101.0
syntactic_surprisal_punct_diff_punct_final: Num voxels rejected = 120.0
word_frequency_punct_diff_punct_final: Num voxels rejected = 132.0
word_length_punct_diff_punct_final: Num voxels rejected = 171.0
all_effort_based_metrics_punct_diff_punct_final: Num voxels rejected = 218.0
pos_dep_tags_all_effort_based_metrics_diff_all_effort_based_metrics_punct: Num voxels rejected = 193.0
aggregated_contrege_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 251.0
aggregated_contrege_comp_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 355.0
aggregated_beam_incontrege_pos_dep_tags_all_effort_based_metrics_diff_pos_dep_tags_all_effort_based_metrics: Num voxels rejected = 229.0
aggregated_bert_PCA_dims_15_contrege_pos_dep_tags_all_effort_based_metrics_diff_aggregated_contrege_pos_dep_ta