In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import glob

In [2]:
def df_to_arr(df):
    
    vals = []
    for _, row in df.iterrows():
        vals.extend(row.tolist())
    return np.array([x for x in vals if str(x) != 'nan'])

# IP Significance Tests

### 1. Paired t-test

In [12]:
scrambled_data = np.load('output/v1/pre_ip/outer_dist.npy')
avgs_scrambled = []

num_subjects = 13
run_size = num_subjects * (num_subjects-1)
sub_num_per_run=num_subjects-1
num_runs = len(scrambled_data) // run_size
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/v1/pre_ip/outer_accs_avg.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=5.405377264230309, pvalue=0.0001586964097815845)

### 2. Paired t-test with Danny's data

In [13]:
avgs_unscrambled = [0.4991,0.5576,0.5868,0.6517,0.5681,0.4544,0.5742,0.5543,0.5931,0.5668,0.5192,0.5313,0.5648]
avgs_scrambled = [0.4908,0.5123,0.5261,0.5283,0.5144,0.4647,0.5162,0.4967,0.5148,0.5132,0.4948,0.5086,0.5158]
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=5.223839148144154, pvalue=0.00021336664062855044)

### 3. Intrasubject paired t-test

In [5]:
sub_num_per_run = 200
num_subjects = 13
run_size = num_subjects * sub_num_per_run

scrambled_data = np.load('output/v1/pre_ip/outer_dist_within.npy')
num_runs = len(scrambled_data) // run_size
avg_data_unscrambled = pd.read_csv('output/v1/pre_ip/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
scipy.stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=17.66618916630196, pvalue=5.907157370651435e-10)

### 4. Intrasubject z significance test

In [6]:
avg_unscrambled = np.mean(df_to_arr(pd.read_csv('output/v1/pre_ip/outer_accs_within.csv', index_col=0).drop(['Average'], axis=1)))

scrambled_data = np.load('output/v1/pre_ip/outer_dist_within.npy')
avgs_scrambled = []
for i in range(num_runs):
    avgs_scrambled.append(np.mean(scrambled_data[i*sub_num_per_run*num_subjects: (i+1)*sub_num_per_run*num_subjects]))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)
    
z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {scipy.stats.norm.sf(abs(z_score))}")


z-score: 51.310797920447676
p-value: 0.0


# CP Significance Tests

### 1. Paired t-test

In [15]:
scrambled_data = np.load('output/v1/pre_cp/outer_dist.npy')
avgs_scrambled = []

num_subjects = 13
run_size = num_subjects * (num_subjects-1)
num_runs = len(scrambled_data) // run_size
sub_num_per_run=num_subjects-1
for i in range(num_subjects):
    
    subj_vals = []
    for j in range(num_runs):
        subj_vals.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avgs_scrambled.append(np.mean(subj_vals))

unscrambled_data = df_to_arr(pd.read_csv('output/v1/pre_cp/outer_accs_avg.csv', index_col=0))
avgs_unscrambled = []
for i in range(num_subjects):
    avgs_unscrambled.append(np.mean(unscrambled_data[i*sub_num_per_run:(i+1)*sub_num_per_run]))
    
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=3.9946406003945354, pvalue=0.0017787701515703039)

### 2. Paired t-test with Danny's data

In [17]:
avgs_unscrambled = [0.4978,0.5260,0.5321,0.5339,0.5295,0.5590,0.5608,0.5356,0.5130,0.5230,0.5413,0.5000,0.5156]
avgs_scrambled = [0.4933,0.5178,0.5126,0.4790,0.5248,0.5166,0.5175,0.5132,0.5138,0.5030,0.4993,0.4973,0.5063]
scipy.stats.ttest_rel(avgs_unscrambled, avgs_scrambled)

Ttest_relResult(statistic=4.048959616541215, pvalue=0.0016133390984771907)

### 3. Intrasubject paired t-test

In [19]:
num_runs = 30
sub_num_per_run = 200
num_subjects = 13
run_size = num_subjects * sub_num_per_run

scrambled_data = np.load('output/v1/pre_cp/outer_dist_within.npy')
avg_data_unscrambled = pd.read_csv('output/v1/pre_cp/outer_accs_within.csv', index_col=0)['Average'].tolist()

avg_data_scrambled = []
for i in range(num_subjects):
    subj_data = []
    for j in range(num_runs):
        subj_data.extend(scrambled_data[sub_num_per_run*num_subjects*j+sub_num_per_run*i:sub_num_per_run*num_subjects*j+sub_num_per_run*(i+1)])
    avg_data_scrambled.append(np.mean(subj_data))
    
scipy.stats.ttest_rel(avg_data_unscrambled, avg_data_scrambled)

Ttest_relResult(statistic=33.140089725225394, pvalue=3.612931156559571e-13)

### 4. Intrasubject z significance test

In [20]:
avg_unscrambled = np.mean(df_to_arr(pd.read_csv('output/v1/pre_cp/outer_accs_within.csv', index_col=0).drop(['Average'], axis=1)))

scrambled_data = np.load('output/v1/pre_cp/outer_dist_within.npy')
avgs_scrambled = []
for i in range(num_runs):
    avgs_scrambled.append(np.mean(scrambled_data[i*sub_num_per_run*num_subjects: (i+1)*sub_num_per_run*num_subjects]))
    
avgs_mean = np.mean(avgs_scrambled)
avgs_std = np.std(avgs_scrambled)    
    
z_score = (avg_unscrambled - avgs_mean) / avgs_std
print(f"z-score: {z_score}")
print(f"p-value: {scipy.stats.norm.sf(abs(z_score))}")


z-score: 38.92712092632016
p-value: 0.0
