In [1]:
import pandas as pd
import scipy.stats as stats
import pingouin as pg
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf

### Mixed-effects model

In [2]:
data = pd.read_csv(r"D:\Download\SFC\SFC_HCP\4_Validation\individual_VGCL\sfc_network_individual.csv")

In [3]:
data[['subject_id', 'run']] = data['id'].str.extract(r'(\d+)_(r\d+_[LR]+)')
data = data.drop('id', axis=1)

In [4]:
data_long = pd.melt(data,
                    id_vars=['gender', 'age', 'subject_id', 'run'],
                    value_vars=data.columns[2:-2],
                    var_name='network',
                    value_name='sfc')

In [61]:
model1 = smf.mixedlm("sfc ~ C(network) + C(run) + C(Gender) + C(Age)", 
                     groups="subject_id", 
                     data=data_long)
result1 = model1.fit()
print(result1.summary())

                    Mixed Linear Model Regression Results
Model:                   MixedLM        Dependent Variable:        sfc       
No. Observations:        79840          Method:                    REML      
No. Groups:              998            Scale:                     0.0158    
Min. group size:         80             Log-Likelihood:            49989.2145
Max. group size:         80             Converged:                 Yes       
Mean group size:         80.0                                                
-----------------------------------------------------------------------------
                                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------------
Intercept                          0.590    0.009  65.181 0.000  0.572  0.607
C(network)[T.L Cingulo-opercular] -0.021    0.003  -7.368 0.000 -0.026 -0.015
C(network)[T.L Default mode]      -0.061    0.003 -21.629 0.000 -0.066 -0.055
C(netw

In [62]:
model2 = smf.mixedlm("sfc ~ C(network) * C(Gender) + C(run) + C(Age)", 
                     groups="subject_id", 
                     data=data_long)
result2 = model2.fit()
print(result2.summary())

                           Mixed Linear Model Regression Results
Model:                        MixedLM             Dependent Variable:             sfc       
No. Observations:             79840               Method:                         REML      
No. Groups:                   998                 Scale:                          0.0158    
Min. group size:              80                  Log-Likelihood:                 49947.7876
Max. group size:              80                  Converged:                      Yes       
Mean group size:              80.0                                                          
--------------------------------------------------------------------------------------------
                                                 Coef.  Std.Err.    z    P>|z| [0.025 0.975]
--------------------------------------------------------------------------------------------
Intercept                                         0.591    0.009  64.016 0.000  0.572  0.609
C(net

In [63]:
model3 = smf.mixedlm("sfc ~ C(network) * C(Age) + C(run) + C(Gender)", 
                     groups="subject_id", 
                     data=data_long)
result3 = model3.fit()
print(result3.summary())

                            Mixed Linear Model Regression Results
Model:                         MixedLM             Dependent Variable:             sfc       
No. Observations:              79840               Method:                         REML      
No. Groups:                    998                 Scale:                          0.0158    
Min. group size:               80                  Log-Likelihood:                 49818.7187
Max. group size:               80                  Converged:                      Yes       
Mean group size:               80.0                                                          
---------------------------------------------------------------------------------------------
                                                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------------------------
Intercept                                          0.591    0.010  60.527 0.000  0.572  

In [5]:
model4 = smf.mixedlm("sfc ~ C(network) + C(run) + C(gender) + C(age) + C(network):C(gender) + C(network):C(age) + C(network):C(run)", 
                     groups="subject_id", 
                     data=data_long)
result4 = model4.fit()
print(result4.summary())



                            Mixed Linear Model Regression Results
Model:                        MixedLM             Dependent Variable:             sfc        
No. Observations:             79840               Method:                         REML       
No. Groups:                   998                 Scale:                          0.0032     
Min. group size:              80                  Log-Likelihood:                 114354.9033
Max. group size:              80                  Converged:                      Yes        
Mean group size:              80.0                                                           
---------------------------------------------------------------------------------------------
                                                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------------------------
Intercept                                          0.623    0.003 192.047 0.000  0.617  

### Calculate Pearson and Spearman correlation coefficients (for comparison of single-run and mixed-run)

In [3]:
from scipy.stats import spearmanr, pearsonr
import numpy as np

In [5]:
mixed = np.load(r"D:\Download\SFC\SFC_HCP\4_Validation\brain_region_difference_VGCL\sfc_group_network.npy")
r1_lr = np.load(r"D:\Download\SFC\SFC_HCP\4_Validation\single_run\REST1_LR\brain_region_difference_VGCL\sfc_ensemble_10_network.npy")
r1_rl = np.load(r"D:\Download\SFC\SFC_HCP\4_Validation\single_run\REST1_RL\brain_region_difference_VGCL\sfc_ensemble_10_network.npy")
r2_lr = np.load(r"D:\Download\SFC\SFC_HCP\4_Validation\single_run\REST2_LR\brain_region_difference_VGCL\sfc_ensemble_10_network.npy")
r2_rl = np.load(r"D:\Download\SFC\SFC_HCP\4_Validation\single_run\REST2_RL\brain_region_difference_VGCL\sfc_ensemble_10_network.npy")

In [6]:
def correlation_analysis(mixed_run, single_runs):
    results = {}
    for i, single_run in enumerate(single_runs):
        spearman_corr, spearman_p = spearmanr(mixed_run, single_run)
        pearson_corr, pearson_p = pearsonr(mixed_run, single_run)
        results[f'RUN_{i+1}'] = {
            'spearman_correlation': spearman_corr,
            'spearman_p_value': spearman_p,
            'pearson_correlation': pearson_corr,
            'pearson_p_value': pearson_p
        }
        print(f"RUN {i+1} & mixed-RUN:")
        print(f"  Spearman: {spearman_corr:.3f} (p={spearman_p})")
        print(f"  Pearson:  {pearson_corr:.3f} (p={pearson_p})")
    return results

correlation_results = correlation_analysis(mixed, [r1_lr, r1_rl, r2_lr, r2_rl])

RUN 1 & mixed-RUN:
  Spearman: 0.871 (p=1.154775689467911e-112)
  Pearson:  0.901 (p=5.487986250236029e-132)
RUN 2 & mixed-RUN:
  Spearman: 0.838 (p=2.9130954281452004e-96)
  Pearson:  0.873 (p=5.8936162421010995e-114)
RUN 3 & mixed-RUN:
  Spearman: 0.892 (p=1.3604222082684397e-125)
  Pearson:  0.913 (p=7.204109734908596e-142)
RUN 4 & mixed-RUN:
  Spearman: 0.732 (p=1.4680223709181659e-61)
  Pearson:  0.818 (p=5.365349024192773e-88)


### Intra-class Correlation Coefficient

In [10]:
bg = list(range(360)) * 5
model = ['mixed']*360 + ['r1_lr']*360 + ['r1_rl']*360 + ['r2_lr']*360 + ['r2_rl']*360
sfc = np.concatenate([mixed, r1_lr, r1_rl, r2_lr, r2_rl])
icc_data = np.array([np.array(bg), np.array(model), sfc]).T
icc_data = pd.DataFrame(icc_data, columns=['brain region', 'model', 'sfc'])
icc_data['sfc'] = icc_data['sfc'].astype(float)

In [11]:
icc_result = pg.intraclass_corr(data=icc_data, targets='brain region', raters='model', ratings='sfc')
print(icc_result)

    Type              Description       ICC          F  df1   df2  pval  \
0   ICC1   Single raters absolute  0.877967  36.972504  359  1440   0.0   
1   ICC2     Single random raters  0.877974  37.062472  359  1436   0.0   
2   ICC3      Single fixed raters  0.878234  37.062472  359  1436   0.0   
3  ICC1k  Average raters absolute  0.972953  36.972504  359  1440   0.0   
4  ICC2k    Average random raters  0.972955  37.062472  359  1436   0.0   
5  ICC3k     Average fixed raters  0.973019  37.062472  359  1436   0.0   

          CI95%  
0   [0.86, 0.9]  
1   [0.86, 0.9]  
2   [0.86, 0.9]  
3  [0.97, 0.98]  
4  [0.97, 0.98]  
5  [0.97, 0.98]  
