In [1]:
import numpy as np
# import glob
import seaborn as sns
sns.set(palette="colorblind")
import matplotlib.pyplot as plt
from pingouin import intraclass_corr
import pandas as pd



In [2]:
def manual_filenames():
    files = []
    d = '/scratch/users/csiyer/'
    for sub in ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']:
        for ses in ['_ses-' + str(n).zfill(2) for n in range(1,13)]:
            files.append(d + sub + ses + '_connectome.npy')
    return files

# load connectomes
def load_connectomes():

    data_dict = {}
    for sub in np.unique([f[f.find('sub'):f.find('sub')+7] for f in glob.glob('outputs/connectomes/*ses*')]):
        data_dict[sub] = {}
        for ses in np.unique([f[f.find('ses'):f.find('ses')+6] for f in glob.glob(f'outputs/connectomes/*{sub}*ses*')]):
            curr = np.load(glob.glob(f'outputs/connectomes/*{sub}_{ses}*')[0])
            data_dict[sub][ses] = {
                'connectome': curr, # save memory without this
                # 'connectome_flat': curr.flatten()
            }

    # connectomes_flat = []
    # for sub in data_dict.keys():
    #     for ses, data in data_dict[sub].items():
    #         connectomes_flat.append(data['connectome_flat'])
    
    return data_dict

def load_connectomes_flat(sub=''):
    
    files = manual_filenames()
    
    # connectomes_flat = pd.DataFrame() 
    connectomes_flat = []
    
    subcount, sescount = (0,0)
    
    for sub in np.unique([f[f.find('sub'):f.find('sub')+7] for f in files]):
        subcount += 1
        sescount = 0
        for ses in np.unique([f[f.find('ses'):f.find('ses')+6] for f in [f_sub for f_sub in files if sub in f_sub]]):
            sescount +=1
            
            # connectomes_flat[sub+ses] = np.load([f for f in files if sub in f and ses in f][0]).flatten()
            connectomes_flat.append( np.load([f for f in files if sub in f and ses in f][0]).flatten() )

    return connectomes_flat, subcount, sescount

In [None]:
# data_dict, connectomes_flat = load_connectomes()
connectomes_flat, n_subjects, n_sessions = load_connectomes_flat()

RDM

In [None]:
# calculate correlation matrix of connectivity vectors
similarity_matrix = connectomes_flat.corr() # get_correlation_matrix()
similarity_matrix.shape # should be 60x60

In [None]:
# plot the similarity matrix 
fig, ax = plt.subplots(1, 1, figsize=(10,8))

sns.heatmap(similarity_matrix, ax=ax)

# Add lines to box off every num_ses entries (each subject)
for i in range(0, similarity_matrix.shape[0], n_sessions):
    ax.axhline(i, color='white', linewidth=2)
    ax.axvline(i, color='white', linewidth=2)

xtick_labels = ['sub-s03', 'sub-s10', 'sub-s19', 'sub-s29', 'sub-s43']
xtick_positions = np.arange(n_sessions/2, similarity_matrix.shape[1], n_sessions)
ax.set_xticks(xtick_positions)
ax.set_xticklabels(xtick_labels)
ax.set_yticks(xtick_positions)
ax.set_yticklabels(xtick_labels)
ax.set_title("Session-wise Connectivity Similarity Matrix")
plt.show()

ICC

In [None]:
data = np.array(connectomes_flat)
data_long = pd.DataFrame(data.flatten(), columns=['connectivity_values'])
data_long['subjects'] = np.repeat(range(n_subjects), n_sessions*data.shape[1]) 
data_long['sessions'] = np.tile(np.repeat(range(n_sessions), data.shape[1]), n_subjects)  # 12 sessions for each subject

In [None]:
icc = intraclass_corr(data=data_long, targets='subjects', raters='sessions', ratings='connectivity_values').set_index('Type')
print(icc.loc['ICC2'])

split half

In [None]:
n_iter = 1000
avg_within_sub_corr = []
avg_across_sub_corr = []

for i_iter in range(n_iter):

    # split data into halves and calculate the mean of the half
    split_dict = {} # for storing values during these calculations

    for sub in data_dict.keys():
        split_dict[sub] = {}
        rand_ses = list(np.random.permutation(list(data_dict[sub].keys())))
        group1data = []
        group2data = []

        for ses, data in data_dict[sub].items():
            if rand_ses.index(ses) > len(rand_ses)/2-1:
                # if in the second half of randomized list
                group1data.append(data['connectome_flat'])
            else:
                group2data.append(data['connectome_flat'])
        
        split_dict[sub]['group1_mean'] = np.mean(group1data, axis=0)
        split_dict[sub]['group2_mean'] = np.mean(group2data, axis=0)

    # iterate again and calculate correlation with each other group
    within_sub_corr = []
    across_sub_corr = []
    for sub in split_dict.keys():
        # save correlation of that sub's two halves
        within_sub_corr.append(np.corrcoef(split_dict[sub]['group1_mean'], split_dict[sub]['group2_mean']))

        # calculate correlation of each of those to all others 
        for sub_two in split_dict.keys():
            if sub_two != sub:
                across_sub_corr.append(np.corrcoef(split_dict[sub]['group1_mean'], split_dict[sub_two]['group1_mean']))
                across_sub_corr.append(np.corrcoef(split_dict[sub]['group1_mean'], split_dict[sub_two]['group2_mean']))
                across_sub_corr.append(np.corrcoef(split_dict[sub]['group2_mean'], split_dict[sub_two]['group1_mean']))
                across_sub_corr.append(np.corrcoef(split_dict[sub]['group2_mean'], split_dict[sub_two]['group2_mean']))

    avg_within_sub_corr.append(np.mean(within_sub_corr))
    avg_across_sub_corr.append(np.mean(across_sub_corr))

In [None]:
# plot the results
fig, ax = plt.subplots(1,1)
fig.suptitle('Within-subject vs. across-subject split-half connectome reliability across 1000 iterations')
ax.boxplot(avg_within_sub_corr, positions=[1], patch_artist=True, boxprops=dict(facecolor='blue'), labels=['Within-subject'])
ax.boxplot(avg_within_sub_corr, positions=[2], patch_artist=True, boxprops=dict(facecolor='red'), labels=['Across-subject'])
ax.set_ylabel('Pearson r of split halves')
ax.set_ylim(0,1)
plt.show()