In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import glob

In [7]:
def df_to_arr(df):
    
    vals = []
    for _, row in df.iterrows():
        vals.extend(row.tolist())
    return np.array([x for x in vals if str(x) != 'nan'])

# IP Significance Tests

### 1. Paired t-test

In [8]:
scrambled_data = np.load('output/mt/pre_ip/outer_dist.npy')
avgs_scrambled = []

num_subjects = 13
run_size = num_subjects * (num_subjects-1)
sub_num_per_run=num_subjects-1
num_runs = len(scrambled_data) // run_size
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/mt/pre_ip/outer_accs.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=3.88272821625636, pvalue=0.0021770941292518844)

### 2. Paired t-test with Danny's data

In [14]:
avgs_unscrambled = [0.5582,0.5055,0.5955,0.5723,0.5673,0.4210,0.5712,0.6276,0.6045,0.5803,0.5000,0.5165,0.5641]
avgs_scrambled = [0.5082,0.4935,0.5190,0.5299,0.5233,0.4692,0.5152,0.5354,0.5331,0.5324,0.4931,0.5091,0.5199]
stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=3.7947024541712535, pvalue=0.0025543074541806692)

### 3. Intrasubject paired t-test

In [15]:
sub_num_per_run = 200
num_subjects = 13
run_size = num_subjects * sub_num_per_run

scrambled_data = np.load('output/mt/pre_ip/outer_dist_within.npy')
num_runs = len(scrambled_data) // run_size
avg_data_unscrambled = pd.read_csv('output/mt/pre_ip/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=18.582103100617424, pvalue=3.2853905583364515e-10)

### 4. Intrasubject z significance test

In [19]:
avg_unscrambled = np.mean(df_to_arr(pd.read_csv('output/mt/pre_ip/outer_accs_within.csv', index_col=0).drop(['Average'], axis=1)))

scrambled_data = np.load('output/mt/pre_ip/outer_dist_within.npy')
avgs_scrambled = []
for i in range(num_runs):
    avgs_scrambled.append(np.mean(scrambled_data[i*sub_num_per_run*num_subjects: (i+1)*sub_num_per_run*num_subjects]))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)
    
z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {stats.norm.sf(abs(z_score))}")


z-score: 37.77660329494958
p-value: 0.0


# CP Significance Tests

### 1. Paired t-test

In [20]:
scrambled_data = np.load('output/mt/pre_cp/outer_dist.npy')
avgs_scrambled = []

num_subjects = 13
run_size = num_subjects * (num_subjects-1)
num_runs = len(scrambled_data) // run_size
sub_num_per_run=num_subjects-1
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/mt/pre_cp/outer_accs.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=6.4316381225138395, pvalue=3.247081070167531e-05)

### 2. Paired t-test with Danny's data

In [24]:
avgs_unscrambled = [0.5460,0.5612,0.5707,0.5781,0.5807,0.5291,0.5729,0.4800,0.5697,0.5278,0.5589,0.5373,0.5599]
avgs_scrambled = [0.5049,0.5145,0.5077,0.5062,0.5114,0.5116,0.5171,0.4878,0.5231,0.4905,0.5129,0.5203,0.5126]
stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=6.79572052104781, pvalue=1.9145692724187166e-05)

### 3. Intrasubject paired t-test

In [25]:
num_runs = 30
sub_num_per_run = 200
num_subjects = 13
run_size = num_subjects * sub_num_per_run

scrambled_data = np.load('output/mt/pre_cp/outer_dist_within.npy')
avg_data_unscrambled = pd.read_csv('output/mt/pre_cp/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=14.58051405023125, pvalue=5.373142467418848e-09)

### 4. Intrasubject z significance test

In [26]:
avg_unscrambled = np.mean(df_to_arr(pd.read_csv('output/mt/pre_cp/outer_accs_within.csv', index_col=0).drop(['Average'], axis=1)))

scrambled_data = np.load('output/mt/pre_cp/outer_dist_within.npy')
avgs_scrambled = []
for i in range(num_runs):
    avgs_scrambled.append(np.mean(scrambled_data[i*sub_num_per_run*num_subjects: (i+1)*sub_num_per_run*num_subjects]))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)    
    
z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {stats.norm.sf(abs(z_score))}")


z-score: 44.774374490985835
p-value: 0.0
