In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import glob

In [3]:
def df_to_arr(df):
    
    vals = []
    for _, row in df.iterrows():
        vals.extend(row.tolist())
    return np.array([x for x in vals if str(x) != 'nan'])

# IP Significance Tests

In [13]:
scrambled_data = np.load('output/mt/post_ip/outer_dist.npy')
avgs_scrambled = []

num_subjects = 8
run_size = num_subjects * (num_subjects-1)
sub_num_per_run=num_subjects-1
num_runs = len(scrambled_data) // run_size
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/mt/post_ip/outer_accs_avg.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=6.46936599858087, pvalue=0.0003439170474668176)

In [6]:
outer_scrambled = np.load('output/mt/post_ip/outer_dist.npy')
avgs_scrambled = []
for i in range(len(outer_scrambled)//run_size):
    sample = outer_scrambled[run_size*i:run_size*(i+1)]
    avgs_scrambled.append(np.mean(sample))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)

outer_unscrambled = df_to_arr(pd.read_csv('output/mt/post_ip/outer_accs_avg.csv', index_col=0))
avg_unscrambled = np.mean(outer_unscrambled)

z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {scipy.stats.norm.sf(abs(z_score))}")

z-score: 2.4986854590307646
p-value: 0.006232744891275658


# CP Significance Tests

In [12]:
scrambled_data = np.load('output/mt/mt/post_cp/outer_dist.npy')
avgs_scrambled = []

num_subjects = 8
run_size = num_subjects * (num_subjects-1)
sub_num_per_run=num_subjects-1
num_runs = len(scrambled_data) // run_size
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/mt/mt/post_cp/outer_accs_avg.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=4.174382650238193, pvalue=0.004166315125556841)

In [8]:
outer_scrambled = np.load('output/mt/post_cp/outer_dist.npy')
avgs_scrambled = []
for i in range(len(outer_scrambled)//run_size):
    sample = outer_scrambled[run_size*i:run_size*(i+1)]
    avgs_scrambled.append(np.mean(sample))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)

outer_unscrambled = df_to_arr(pd.read_csv('output/mt/post_cp/outer_accs_avg.csv', index_col=0))
avg_unscrambled = np.mean(outer_unscrambled)

z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {scipy.stats.norm.sf(abs(z_score))}")

z-score: 1.0305449344148274
p-value: 0.15137713507540018


# Post-Training Unpermuted vs. Permuted CP/IP (Within)

In [74]:
num_runs = 30
sub_num_per_run = 200
num_subjects = 8

scrambled_data = np.load('output/mt/post_ip/outer_perms_within.npy')
avg_data_unscrambled = pd.read_csv('output/mt/post_ip/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
scipy.stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=8.604721200445661, pvalue=5.70753853598756e-05)

In [42]:
num_runs = 30
sub_num_per_run = 200
num_subjects = 8

scrambled_data = np.load('output/mt/post_cp/outer_perms_within.npy')
avg_data_unscrambled = pd.read_csv('output/mt/post_cp/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
scipy.stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=9.877975827917348, pvalue=2.3196463548008575e-05)

# Post-Training vs. Pre-Training CP/IP (Within)

In [4]:
pre_data = pd.read_csv('output/mt/pre_ip/outer_accs_within.csv', index_col=0)
post_data = pd.read_csv('output/mt/post_ip/outer_accs_within.csv', index_col=0)
common = post_data.index.intersection(pre_data.index)

avg_data_post = post_data['Average'].tolist()
avg_data_pre = pre_data.loc[common]['Average'].tolist()

scipy.stats.ttest_rel(avg_data_post, avg_data_pre)

Ttest_relResult(statistic=-1.1968580969476674, pvalue=0.2703186955294861)

In [5]:
pre_data = pd.read_csv('output/mt/pre_cp/outer_accs_within.csv', index_col=0)
post_data = pd.read_csv('output/mt/post_cp/outer_accs_within.csv', index_col=0)
common = post_data.index.intersection(pre_data.index)

avg_data_post = post_data['Average'].tolist()
avg_data_pre = pre_data.loc[common]['Average'].tolist()

scipy.stats.ttest_rel(avg_data_post, avg_data_pre)

Ttest_relResult(statistic=-2.4713472158564054, pvalue=0.042748522816239705)

In [6]:
pre_data = pd.read_csv('output/mt/post_ip/outer_accs_within_pre.csv', index_col=0)
post_data = pd.read_csv('output/mt/post_ip/outer_accs_within_post.csv', index_col=0)

avg_data_post = post_data['Average'].tolist()
avg_data_pre = pre_data.loc[common]['Average'].tolist()

scipy.stats.ttest_rel(avg_data_post, avg_data_pre)

Ttest_relResult(statistic=-1.0653611089770083, pvalue=0.32207416102741493)

In [7]:
pre_data = pd.read_csv('output/mt/post_cp/outer_accs_within_pre.csv', index_col=0)
post_data = pd.read_csv('output/mt/post_cp/outer_accs_within_post.csv', index_col=0)

avg_data_post = post_data['Average'].tolist()
avg_data_pre = pre_data.loc[common]['Average'].tolist()

scipy.stats.ttest_rel(avg_data_post, avg_data_pre)

Ttest_relResult(statistic=0.2788760494354379, pvalue=0.7883989671364511)