# Script used to troubleshoot missing FC matrices / inconsistencies between available HCP subjects between:
## - my original Schaefer sample - May 2022 (N=1014)
## - Liisbeth's Glasser sample after Bin redownloaded the data - March 2023 (N=1018) 
## - my new Schaefer sample computed from Bin's newly downloaded data - April 2023 (N=1014) 
### -> ALL DIFFERENT

In [1]:
# General
import os
import sys
import numpy as np
import pandas as pd
import csv
import math
from math import isnan
import statistics
import pingouin as pg
import pickle

# Computing / Analyses
import scipy.io  # loadmat
from scipy import stats
import sklearn 
from brainstat.stats.terms import FixedEffect
from brainstat.stats.SLM import SLM
from statsmodels.stats.multitest import fdrcorrection # does not yield exactly the same FDR correction as R but the same up to 14th decimal place so good enough

# Visualisation
import matplotlib.pyplot as plt 
import seaborn as sns
import vtk
from IPython.display import display
import matplotlib.collections as clt
import ptitprince as pt

# Neuroimaging
import nibabel
import nilearn
from brainstat.datasets import fetch_parcellation
from enigmatoolbox.permutation_testing import spin_test, shuf_test

# Gradients
import brainspace
from brainspace.datasets import load_parcellation, load_conte69
from brainspace.plotting import plot_hemispheres
from brainspace.gradient import GradientMaps
from brainspace.utils.parcellation import map_to_labels

  return warn(


In [2]:
datadir = '/data/p_02667/sex_diff_gradients/data/'

In [63]:
unmatch_old = pd.read_csv(datadir+'fc_matrices/HCP/functional_schaefer400/unmatch_to_calculate_mean.txt', header=None)  
unmatch_old = unmatch_old.iloc[:,0]
unmatch_old = unmatch_old.tolist()
unmatch_old.sort()
unmatch_old = unmatch_old[5:-2]
unmatch_old = [int(e) for e in unmatch_old]

In [64]:
unmatch_new = pd.read_csv(datadir+'fc_matrices/HCP/functional_schaefer400new/unmatch_to_calculate_mean.txt', header=None)  
unmatch_new = unmatch_new.iloc[:,0]
unmatch_new = unmatch_new.tolist()

In [70]:
unmatch_L = pd.read_csv('/data/p_02667/Liisbeth/data/fc_matrices_glasser/unmatch.txt', header=None)  
unmatch_L = unmatch_L.iloc[:,0]
unmatch_L = unmatch_L.tolist()
unmatch_L.sort()
unmatch_L = unmatch_L[15:-2]
unmatch_L = [int(e) for e in unmatch_L] 

In [71]:
len(unmatch_L)

201

In [55]:
for i in range(len(unmatch_new)):
    if unmatch_new[i] == unmatch_old[i]:
        print("ok")
    else:
        print(f"ERROR -> new: {unmatch_new[i]} vs old: {unmatch_old[i]}")
        print

ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ERROR -> new: 151021 vs old: 150928
ERROR -> new: 155231 vs old: 151021
ERROR -> new: 159845 vs old: 155231
ERROR -> new: 160931 vs old: 159845
ERROR -> new: 165234 vs old: 160931
ERROR -> new: 168038 vs old: 165234
ERROR -> new: 169141 vs old: 168038
ERROR -> new: 170934 vs old: 169141
ERROR -> new: 171128 vs old: 170934
ERROR -> new: 171734 vs old: 171128
ERROR -> new: 172635 vs old: 171734
ERROR -> new: 173132 vs old: 172635
ERROR -> new: 173233 vs old: 173132
ERROR -> new: 177342 vs old: 173233
ERROR -> new: 179548 vs old: 177342
ERROR -> new: 179952 vs old: 179548
ERROR -> new: 183741 vs old: 179952
ERROR -> new: 186949 vs old: 183741
ERROR -> new: 190132 vs old: 186949
ERROR -> new: 192237 vs old: 190132
ok
ok
ok
ok
ERROR -> new: 198047 vs old: 200008
ERROR -> new: 200210 vs old: 200109
ERROR -> new: 201717 vs old: 200210
ERROR -> new: 203721 vs old: 201717
ERROR -> new: 207628 vs old: 203721
ERROR -

In [75]:
# elements in common
list(set(unmatch_new).intersection(unmatch_old))

[150019,
 355845,
 200210,
 197651,
 623137,
 101410,
 221218,
 116221,
 131621,
 550439,
 734247,
 208428,
 122418,
 362034,
 186949,
 173132,
 745555,
 142424,
 172635,
 155231,
 701535,
 766563,
 159845,
 168038,
 569965,
 578158,
 521331,
 171128,
 209531,
 145531,
 571548,
 113821,
 160931,
 584355,
 143527,
 173233,
 112819,
 190132,
 169141,
 929464,
 177342,
 351938,
 116423,
 120010,
 146634,
 492754,
 107220,
 693461,
 171734,
 355542,
 179952,
 196851,
 113417,
 207628,
 972566,
 462139,
 689470,
 128329,
 197449,
 782157,
 179548,
 473952,
 995174,
 733548,
 165234,
 613235,
 121719,
 549757,
 856463,
 129937,
 129432,
 116120,
 611231,
 193441,
 953764,
 355239,
 248238,
 170934,
 688569,
 183741,
 114116,
 810439,
 203721,
 973770,
 644044,
 126931,
 121820,
 117728,
 121315,
 822244,
 151021,
 201717,
 650746,
 150524,
 129533]

In [57]:
# elements not in common
set(unmatch_new) ^ set(unmatch_old)

{150928, 192237, 198047, 200008, 200109, 210011, 303624, 406836}

In [58]:
len(unmatch_new) == len(unmatch_old)

True

In [73]:
# elements in common
len(list(set(unmatch_new).intersection(unmatch_L)))

95

In [76]:
# elements not in common
set(unmatch_new) ^ set(unmatch_L)

{106016,
 135528,
 150928,
 157336,
 192237,
 198047,
 198855,
 200008,
 200109,
 210011,
 210415,
 214019,
 303624,
 316835,
 406836}

In [78]:
sub_old = os.listdir(datadir+'fc_matrices/HCP/functional_schaefer400/mean/')
sub_old.sort()

In [80]:
len(sub_old)

1015

In [85]:
sub_old = sub_old[1:]

In [87]:
len(sub_old)

1014

In [82]:
sub_new = os.listdir(datadir+'fc_matrices/HCP/functional_schaefer400new/mean/')
sub_new.sort()

In [83]:
len(sub_new)

1014

In [89]:
sub_L = os.listdir('/data/p_02667/Liisbeth/data/fc_matrices_glasser/Mean/')
sub_L.sort()

In [90]:
sub_L

['100206.csv',
 '100307.csv',
 '100408.csv',
 '100610.csv',
 '101006.csv',
 '101107.csv',
 '101309.csv',
 '101915.csv',
 '102008.csv',
 '102109.csv',
 '102311.csv',
 '102513.csv',
 '102614.csv',
 '102715.csv',
 '102816.csv',
 '103010.csv',
 '103111.csv',
 '103212.csv',
 '103414.csv',
 '103515.csv',
 '103818.csv',
 '104012.csv',
 '104416.csv',
 '104820.csv',
 '105014.csv',
 '105115.csv',
 '105216.csv',
 '105620.csv',
 '105923.csv',
 '106016.csv',
 '106319.csv',
 '106521.csv',
 '106824.csv',
 '107018.csv',
 '107321.csv',
 '107422.csv',
 '107725.csv',
 '108020.csv',
 '108121.csv',
 '108222.csv',
 '108323.csv',
 '108525.csv',
 '108828.csv',
 '109123.csv',
 '109325.csv',
 '109830.csv',
 '110007.csv',
 '110411.csv',
 '110613.csv',
 '111009.csv',
 '111211.csv',
 '111312.csv',
 '111413.csv',
 '111514.csv',
 '111716.csv',
 '112112.csv',
 '112314.csv',
 '112516.csv',
 '112920.csv',
 '113215.csv',
 '113316.csv',
 '113619.csv',
 '113922.csv',
 '114217.csv',
 '114318.csv',
 '114419.csv',
 '114621.c

In [98]:
sub_new

['100206.csv',
 '100307.csv',
 '100408.csv',
 '100610.csv',
 '101006.csv',
 '101107.csv',
 '101309.csv',
 '101915.csv',
 '102008.csv',
 '102109.csv',
 '102311.csv',
 '102513.csv',
 '102614.csv',
 '102715.csv',
 '102816.csv',
 '103010.csv',
 '103111.csv',
 '103212.csv',
 '103414.csv',
 '103515.csv',
 '103818.csv',
 '104012.csv',
 '104416.csv',
 '104820.csv',
 '105014.csv',
 '105115.csv',
 '105216.csv',
 '105620.csv',
 '105923.csv',
 '106016.csv',
 '106319.csv',
 '106521.csv',
 '106824.csv',
 '107018.csv',
 '107321.csv',
 '107422.csv',
 '107725.csv',
 '108020.csv',
 '108121.csv',
 '108222.csv',
 '108323.csv',
 '108525.csv',
 '108828.csv',
 '109123.csv',
 '109325.csv',
 '109830.csv',
 '110007.csv',
 '110411.csv',
 '110613.csv',
 '111009.csv',
 '111211.csv',
 '111312.csv',
 '111413.csv',
 '111514.csv',
 '111716.csv',
 '112112.csv',
 '112314.csv',
 '112516.csv',
 '112920.csv',
 '113215.csv',
 '113316.csv',
 '113619.csv',
 '113922.csv',
 '114217.csv',
 '114318.csv',
 '114419.csv',
 '114621.c

In [94]:
len(sub_L)

1018

In [91]:
sub_old == sub_new

False

In [95]:
# elements not in common
set(sub_old) ^ set(sub_new)

{'150928.csv',
 '192237.csv',
 '198047.csv',
 '200008.csv',
 '200109.csv',
 '210011.csv',
 '303624.csv',
 '406836.csv'}

# this should be the same, as matrices are computed from the same newly downloaded data by Bin in March 2023

In [92]:
sub_L == sub_new

False

In [96]:
# elements not in common
set(sub_L) ^ set(sub_new)

{'192237.csv', '198047.csv', '210011.csv', '406836.csv'}

In [97]:
unmatch_new

[101410,
 107220,
 112819,
 113417,
 113821,
 114116,
 116120,
 116221,
 116423,
 117728,
 120010,
 121315,
 121719,
 121820,
 122418,
 126931,
 128329,
 129432,
 129533,
 129937,
 131621,
 142424,
 143527,
 145531,
 146634,
 150019,
 150524,
 151021,
 155231,
 159845,
 160931,
 165234,
 168038,
 169141,
 170934,
 171128,
 171734,
 172635,
 173132,
 173233,
 177342,
 179548,
 179952,
 183741,
 186949,
 190132,
 192237,
 193441,
 196851,
 197449,
 197651,
 198047,
 200210,
 201717,
 203721,
 207628,
 208428,
 209531,
 210011,
 221218,
 248238,
 351938,
 355239,
 355542,
 355845,
 362034,
 406836,
 462139,
 473952,
 492754,
 521331,
 549757,
 550439,
 569965,
 571548,
 578158,
 584355,
 611231,
 613235,
 623137,
 644044,
 650746,
 688569,
 689470,
 693461,
 701535,
 733548,
 734247,
 745555,
 766563,
 782157,
 810439,
 822244,
 856463,
 929464,
 953764,
 972566,
 973770,
 995174]