# Nov 27 2024: roi profile variability
1. unique profiles across modes/solutions and their consistency over animals

In [1]:
import csv
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp 
import dill as pickle 
from os.path import join as pjoin
from itertools import product
from tqdm import tqdm
from copy import deepcopy
from pathlib import Path
import subprocess
from scipy import sparse, stats
from scipy.spatial.distance import jensenshannon
from multiprocessing import Pool
import glob
import random

from sklearn.cluster import DBSCAN

import arviz as az

import ants
from nipype.interfaces import afni

from itertools import product, combinations, chain
import multiprocessing as mp
from functools import partial

# networks
import graph_tool.all as gt

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.cm import rainbow

plt.rcParamsDefault['font.family'] = "sans-serif"
plt.rcParamsDefault['font.sans-serif'] = "Arial"
plt.rcParams['font.size'] = 14
plt.rcParams["errorbar.capsize"] = 0.5

import cmasher as cmr  # CITE ITS PAPER IN YOUR MANUSCRIPT
import colorcet as cc

# ignore user warnings
import warnings
warnings.filterwarnings("ignore") #, category=UserWarning)

In [2]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

def set_seed(args):
    gt.seed_rng(args.SEED)
    np.random.seed(args.SEED)

set_seed(args)

In [3]:
args.type = 'spatial'
args.roi_size = 225
args.maintain_symmetry = True
args.brain_div = 'whl'
args.num_rois = 162

PARC_DESC = (
    f'type-{args.type}'
    f'_size-{args.roi_size}'
    f'_symm-{args.maintain_symmetry}'
    f'_braindiv-{args.brain_div}'
    f'_nrois-{args.num_rois}'
)

In [None]:
args.GRAPH_DEF = f'constructed'
args.GRAPH_METHOD = f'pearson-corr'
args.THRESHOLDING = f'positive'
args.EDGE_DEF = f'binary'
args.EDGE_DENSITY = 20
args.LAYER_DEF = f'individual'
args.DATA_UNIT = f'sub'

BASE_path = f'{os.environ["HOME"]}/mouse_dataset'
PARCELS_path = f'{BASE_path}/parcels'
ROI_path = f'{BASE_path}/roi_results_v2/{PARC_DESC}'
TS_path = f'{ROI_path}/runwise_timeseries'
ROI_RESULTS_path = (
    f'{ROI_path}'
    f'/graph-{args.GRAPH_DEF}/method-{args.GRAPH_METHOD}'
    f'/threshold-{args.THRESHOLDING}/edge-{args.EDGE_DEF}/density-{args.EDGE_DENSITY}'
    f'/layer-{args.LAYER_DEF}/unit-{args.DATA_UNIT}'
)
RSN_ROI_path = f'{ROI_path}/rsns'
os.system(f'mkdir -p {RSN_ROI_path}')
IC_ROI_path = f'{ROI_path}/ics'
os.system(f'mkdir -p {IC_ROI_path}')
GRAPH_path = f'{ROI_RESULTS_path}/graphs'
os.system(f'mkdir -p {GRAPH_path}')
SBM_path = f'{ROI_RESULTS_path}/model-fits'
os.system(f'mkdir -p {SBM_path}')
ESTIM_path = f'{ROI_RESULTS_path}/estimates'
os.system(f'mkdir -p {ESTIM_path}/individual')
os.system(f'mkdir -p {ESTIM_path}/group')

0

In [5]:
args.dc, args.sbm = True, 'm'

args.nested = True if args.sbm in ['h'] else False

args.force_niter = 40000
args.num_draws = int((1/2) * args.force_niter)

def sbm_name(args):
    dc = f'dc' if args.dc else f'nd'
    dc = f'' if args.sbm in ['m', 'a'] else dc
    file = f'sbm-{dc}-{args.sbm}'
    return file

SBM = sbm_name(args)
SBM

'sbm--m'

In [6]:
def get_membership_matrix(num_rois, df, col='pi'):
    pis = [np.zeros((num_rois, 1)) if np.isnan(pi).all() else pi for pi in df[col]]

    num_modes = len(df)
    num_comms = np.max([pi.shape[-1] for pi in pis])
    num_rois = num_rois
    M = np.zeros((num_rois, num_modes, num_comms)) # membership profile matrix

    for idx_mode, pi in enumerate(pis):
        M[:, idx_mode, :pi.shape[-1]] = pi
    
    return M

In [7]:
marginals_files = sorted(glob.glob(f'{ESTIM_path}/individual/sub-*/partition-modes-group-aligned/{SBM}/desc-mem-mats.pkl', recursive=True))
marginals_df = []
for sbm_file in marginals_files:
    with open(f'{sbm_file}', 'rb') as f:
        row = pickle.load(f)
    marginals_df += [row]
marginals_df = pd.concat(marginals_df).reset_index(drop=True)
mode_ids = list(chain.from_iterable([list(range(count)) for count in marginals_df['sub'].value_counts().sort_index().to_list()]))
marginals_df['mode_id'] = mode_ids
marginals_df

Unnamed: 0,sub,sbm,pi_aligned,omega,mode_id
0,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.806483,0
1,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1....",0.165306,1
2,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [...",0.019968,2
3,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....",0.008243,3
4,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.3338,0
5,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1....",0.3292,1
6,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [...",0.26572,2
7,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....",0.07104,3
8,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.00024,4
9,SLC03,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.575495,0


In [8]:
cols = [col for col in  list(marginals_df.columns) if 'pi_' in col]
cols

['pi_aligned']

In [9]:
# SOFT MARGINALS
soft_marginals_df = []
for sub, group in marginals_df.groupby('sub'):
    omegas = group['omega'].to_list()
    dct = {'sub': [sub], 'sbm': [SBM]}
    for col in cols:
        M = get_membership_matrix(args.num_rois, group, col=col)
        SCs = np.average(M, axis=1, weights=omegas) # soft-comms.
        dct[col] = [SCs]
    soft_marginals_df += [pd.DataFrame(dct)]
soft_marginals_df = pd.concat(soft_marginals_df).reset_index(drop=True)
soft_marginals_df

Unnamed: 0,sub,sbm,pi_aligned
0,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0...."
1,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,SLC03,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
3,SLC04,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,SLC05,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
5,SLC06,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
6,SLC07,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
7,SLC08,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0...."
8,SLC09,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
9,SLC10,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."


unique profiles

In [10]:
def get_dist_mat(X):
    # Jenson Shannon divergence between two probability mass functions
    D = np.zeros((X.shape[0], X.shape[0]))
    for i, j in combinations(range(X.shape[0]), 2):
        D[i, j] = jensenshannon(X[i, :], X[j, :])
        D[j, i] = D[i, j]
    return D

def find_clusters(args, X):
    D = get_dist_mat(X)
    D = np.nan_to_num(D)
    clustering = DBSCAN(eps=args.eps, min_samples=1)
    grps = clustering.fit_predict(D)
    return D, grps

def get_unique_profiles_and_strengths_per_roi(args, roi, X, omegas):
    D, grps = find_clusters(args, X)
    U = []
    S = []
    for g in np.unique(grps):
        U += [np.round(np.mean(X[grps == g, :], axis=0), decimals=3)]
        S += [np.sum(omegas[grps == g])]
    U = np.stack(U, axis=0)
    S = np.stack(S)
    eff_num_prfls = np.exp(stats.entropy(S)) # effective number of profiles
    return (U, S, eff_num_prfls)

def get_unique_profiles_and_strengths_per_col(args, sub, SBM, col, M, omegas):
    col_df = []
    for roi in range(M.shape[0]):
        X = M[roi, :, :]
        (U, S, eff_num_prfls) = get_unique_profiles_and_strengths_per_roi(args, roi, X, omegas)
        crow = pd.DataFrame(dict(
            sub=[sub],
            sbm=[SBM],
            col=[col],
            roi=[roi],
            profiles=[U],
            strengths=[S],
            eff_num_profiles=[eff_num_prfls],
        ))
        col_df += [crow]
    col_df = pd.concat(col_df).reset_index(drop=True)
    return col_df

def get_unique_profiles_and_strengths_per_sub(args, sub, SBM, cols, group):
    profiles_sub_df = []
    for col in cols:
        M = get_membership_matrix(args.num_rois, group, col)
        omegas = group['omega'].to_numpy()
        col_df = get_unique_profiles_and_strengths_per_col(args, sub, SBM, col, M, omegas)
        profiles_sub_df += [col_df]
    return pd.concat(profiles_sub_df).reset_index(drop=True)

In [11]:
args.eps = 0.3
profiles_df = []
for sub, group in tqdm(marginals_df.groupby('sub')):
    profiles_df += [get_unique_profiles_and_strengths_per_sub(args, sub, SBM, cols, group)]
profiles_df = pd.concat(profiles_df).reset_index(drop=True)

100%|██████████| 10/10 [00:02<00:00,  4.50it/s]


In [12]:
profiles_df

Unnamed: 0,sub,sbm,col,roi,profiles,strengths,eff_num_profiles
0,SLC01,sbm--m,pi_aligned,0,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
1,SLC01,sbm--m,pi_aligned,1,"[[0.0, 0.684, 0.315, 0.001, 0.0, 0.0, 0.0, 0.0...","[0.8064825930372149, 0.17354941976790716, 0.01...",1.742874
2,SLC01,sbm--m,pi_aligned,2,"[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
3,SLC01,sbm--m,pi_aligned,3,"[[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
4,SLC01,sbm--m,pi_aligned,4,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
...,...,...,...,...,...,...,...
1615,SLC10,sbm--m,pi_aligned,157,"[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
1616,SLC10,sbm--m,pi_aligned,158,"[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
1617,SLC10,sbm--m,pi_aligned,159,"[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]]",[1.0],1.000000
1618,SLC10,sbm--m,pi_aligned,160,"[[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]]",[1.0],1.000000


In [13]:
# the group level statistics
# collect effective_number_of_profiles per roi

In [14]:
def bootstrap_ci(data, n_boot=10000, conf=0.95):
    boots = [np.mean(np.random.choice(data, len(data), replace=True)) 
             for _ in range(n_boot)]
    mean = np.mean(data)
    ci = np.percentile(boots, [(1-conf)*50, (1+conf)*50])
    return mean, ci

In [15]:
enps_df = []
for (roi, col), group in tqdm(profiles_df.groupby(by=['roi', 'col'])):
    enps = group['eff_num_profiles'].to_list()
    m, ci = bootstrap_ci(enps, )
    row = pd.DataFrame(dict(
        sbm=[SBM],
        col=[col],
        roi=[roi],
        enps=[enps],
        mean=[m],
        ci=[ci],
    ))
    enps_df += [row]
enps_df = pd.concat(enps_df).reset_index(drop=True)

100%|██████████| 162/162 [00:31<00:00,  5.15it/s]


In [16]:
enps_df

Unnamed: 0,sbm,col,roi,enps,mean,ci
0,sbm--m,pi_aligned,0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"
1,sbm--m,pi_aligned,1,"[1.7428743544029135, 2.959397889761131, 1.9780...",1.820061,"[1.428293512762444, 2.236161376321906]"
2,sbm--m,pi_aligned,2,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"
3,sbm--m,pi_aligned,3,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"
4,sbm--m,pi_aligned,4,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"
...,...,...,...,...,...,...
157,sbm--m,pi_aligned,157,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"
158,sbm--m,pi_aligned,158,"[1.1566304904988036, 2.2532342189301025, 1.000...",1.259429,"[1.0025201165262128, 1.6111907336397535]"
159,sbm--m,pi_aligned,159,"[1.1566304904988036, 2.2580653166805984, 1.000...",1.260006,"[1.0017987397542774, 1.6128271935507166]"
160,sbm--m,pi_aligned,160,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",1.000000,"[1.0, 1.0]"


In [17]:
def plot_enps_per_roi(ax, roi, enps, m, ci):
    sns.kdeplot(enps, ax=ax, bw_adjust=1.0)
    ax.scatter(enps, [0.05]*len(enps), marker='x', color='grey', s=50)
    ax.axvline(m, color='salmon', linestyle='-', label=f'mean={m:.2f}')
    ax.axvspan(ci[0], ci[1], alpha=0.3, color='salmon', label=f'95% CI')
    ax.set(xlim=[0.0, 6.0], xlabel=f'effective number of profiles', ylabel=f'density', title=f'roi {roi:03d}')
    ax.legend()
    ax.grid(alpha=0.3)
    return ax

def plot_enps_per_col(args, df, ):
    ncols = 7
    nrows = np.ceil(args.num_rois / ncols).astype(int)
    fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows), sharey=False)
    fig.tight_layout(h_pad=3, w_pad=3)

    for idx, row in df.iterrows():
        roi = row['roi']
        enps = row['enps']
        m = row['mean']
        ci = row['ci']

        r, c = idx // ncols, idx % ncols 
        ax = axs[r, c] if nrows > 1 else axs[c]
        ax = plot_enps_per_roi(ax, roi, enps, m, ci)
    return fig

In [18]:
for col, group in tqdm(enps_df.groupby('col')):
    group = group.reset_index(drop=True)
    fig = plot_enps_per_col(args, group)
    fig.suptitle(f'{SBM} {col}', x=0.0, y=1.0)

    folder = f'{ESTIM_path}/group/membership-profiles/{SBM}'
    os.system(f'mkdir -p {folder}')
    fig.savefig(f'{folder}/desc-{col}.pdf', bbox_inches='tight')
    plt.close('all')

100%|██████████| 1/1 [00:20<00:00, 20.74s/it]


visualizing on the brain

In [19]:
parcels_img = ants.image_read(f'{PARCELS_path}/{PARC_DESC}_desc-parcels.nii.gz')
parcels = parcels_img.numpy()
roi_labels = np.loadtxt(f'{PARCELS_path}/{PARC_DESC}_desc-labels.txt')

In [20]:
def concatenate(in_files, out_file):
    try:
        os.remove(out_file)
    except:
        pass

    tcat = afni.TCat()
    tcat.inputs.in_files = in_files
    tcat.inputs.out_file = out_file
    tcat.inputs.rlt = ''
    tcat.cmdline 
    tcat.run()

    for file in in_files:
        try:
            os.remove(file)
        except:
            pass
    return None

def profiles_to_nifti(args, X, folder, name=f'col'):
    os.system(f'mkdir -p {folder}')
    in_files = []
    for idx_type in range(X.shape[-1]):
        x = X[:, idx_type]
        x_img = np.zeros_like(parcels)
        for idx, roi in enumerate(roi_labels):
            x_img += (parcels == roi) * (x[idx])
        
        file = f'{folder}/{idx_type:01d}.nii.gz'
        parcels_img.new_image_like(x_img).to_filename(file)
        in_files.append(file)

    out_file = f'{folder}/desc-{name}.nii.gz'
    concatenate(in_files, out_file)
    return None

In [21]:
for col, group in enps_df.groupby('col'):
    X = group['mean'].to_numpy()[:, None]
    folder = f'{ESTIM_path}/group/membership-profiles/{SBM}'
    os.system(f'mkdir -p {folder}')
    profiles_to_nifti(args, X, folder, col)

241222-21:07:39,255 nipype.interface INFO:
	 stderr 2024-12-22T21:07:39.255845:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:07:39,257 nipype.interface INFO:
241222-21:07:39,285 nipype.interface INFO:
	 stderr 2024-12-22T21:07:39.285334:++ elapsed time = 0.0 s


group level mean `Eff.#profiles` per canonical rsn and ic

In [22]:
def apply_rsn_mask(args, nii_file, rsn_file, out_file): #(args, folder, name, rsn_name):
    calc = afni.Calc()
    calc.inputs.in_file_a = nii_file #f'{folder}/desc-{name}.nii.gz'
    calc.inputs.in_file_b = rsn_file #f'{RSN_ROI_path}/desc-{rsn_name}-mask.nii.gz'
    calc.inputs.expr = 'a*b'
    calc.inputs.out_file = out_file #f'{folder}/rsn-{rsn_name}_desc-{name}.nii.gz'
    calc.inputs.outputtype = 'NIFTI'
    calc.inputs.overwrite = True
    calc.run()
    return None

In [23]:
folder = f'{ESTIM_path}/group/membership-profiles/{SBM}'
nii_files = sorted(glob.glob(f'{folder}/*.nii.gz'))
rsn_files = sorted(glob.glob(f'{RSN_ROI_path}/*-mask.nii.gz', recursive=True))
ic_files = sorted(glob.glob(f'{IC_ROI_path}/*-mask*.nii.gz', recursive=True))
ics_df = pd.read_csv(f'{IC_ROI_path}/ic_names.csv')
ic_files

['/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-00-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-01-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-02-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-03-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-04-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-05-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-06-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-

In [24]:
for rsn_file in (rsn_files):
    rsn_name = [n for n in rsn_file.split('/') if 'desc-' in n][0].split('-')[2]
    
    out_folder = f'{folder}/rsns'
    os.system(f'mkdir -p {out_folder}')

    for nii_file in nii_files:
        nii_name = [n for n in nii_file.split('/') if 'desc-' in n][0]
        out_file = f'{out_folder}/rsn-{rsn_name}_{nii_name}'
        apply_rsn_mask(args, nii_file, rsn_file, out_file)
    #     break
    # break

241222-21:07:39,790 nipype.interface INFO:
	 stderr 2024-12-22T21:07:39.790581:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:07:39,793 nipype.interface INFO:
	 stderr 2024-12-22T21:07:39.790581:++ Authored by: A cast of thousands
241222-21:07:39,835 nipype.interface INFO:
	 stderr 2024-12-22T21:07:39.835374:++ Output dataset /home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/graph-constructed/method-pearson-corr/threshold-positive/edge-binary/density-20/layer-individual/unit-sub/estimates/group/membership-profiles/sbm--m/rsns/rsn-basal_ganglia_desc-pi_aligned.nii.gz
241222-21:07:40,283 nipype.interface INFO:
	 stderr 2024-12-22T21:07:40.282975:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:07:40,285 nipype.interface INFO:
	 stderr 2024-12-22T21:07:40.282975:++ Authored by: A cast of thousands
241222-21:07:40,315 nipype.interface INFO:
	 stderr 2024-12-22T21:07:40.315902:++ Output dataset /hom

In [25]:
for ic_file, (idx, row) in zip(ic_files, ics_df.iterrows()):
    ic_name = row['abbrev']
    
    out_folder = f'{folder}/ics'
    os.system(f'mkdir -p {out_folder}')

    for nii_file in nii_files:
        nii_name = [n for n in nii_file.split('/') if 'desc-' in n][0]
        out_file = f'{out_folder}/ic-{idx:02d}_{nii_name}'
        apply_rsn_mask(args, nii_file, ic_file, out_file)

241222-21:07:42,663 nipype.interface INFO:
	 stderr 2024-12-22T21:07:42.663479:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:07:42,666 nipype.interface INFO:
	 stderr 2024-12-22T21:07:42.663479:++ Authored by: A cast of thousands
241222-21:07:42,701 nipype.interface INFO:
	 stderr 2024-12-22T21:07:42.701782:++ Output dataset /home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/graph-constructed/method-pearson-corr/threshold-positive/edge-binary/density-20/layer-individual/unit-sub/estimates/group/membership-profiles/sbm--m/ics/ic-00_desc-pi_aligned.nii.gz
241222-21:07:43,151 nipype.interface INFO:
	 stderr 2024-12-22T21:07:43.150929:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:07:43,153 nipype.interface INFO:
	 stderr 2024-12-22T21:07:43.150929:++ Authored by: A cast of thousands
241222-21:07:43,176 nipype.interface INFO:
	 stderr 2024-12-22T21:07:43.176648:++ Output dataset /home/govindas/mo

In [26]:
# distribution of mean Eff.#profiles per rsn

In [27]:
def plot_enp_dist_all_rsns_per_col(args, files):
    ncols = 5
    nrows = np.ceil(len(files) / ncols).astype(int)
    fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows))
    fig.tight_layout(h_pad=3, w_pad=3)

    fig.suptitle(f'{SBM} {col}', x=0.0, y=1.0)

    for idx, file in enumerate(files):
        r, c = idx // ncols, idx % ncols
        ax = axs[r, c] if nrows > 1 else axs[c]

        rsn_name = [n for n in file.split('/') if 'rsn-' in n][0]
        rsn_name = '-'.join(rsn_name.split('-')[1].split('_')[:-1])

        enps = ants.image_read(file).numpy()
        enps = enps[enps > 0]

        sns.kdeplot(enps, ax=ax, fill=True, alpha=0.3)
        ax.set(title=f'{rsn_name}', xlabel=f'effective number of profiles', ylabel='density')

    for c_ in range(c+1, ncols):
        ax = axs[r, c_] if nrows > 1 else axs[c_]
        fig.delaxes(ax)
    return fig

In [28]:
for col in tqdm(cols):
    folder = f'{ESTIM_path}/group/membership-profiles/{SBM}'
    files = sorted(glob.glob(f'{folder}/rsns/*-{col}*.nii.gz'))
    fig = plot_enp_dist_all_rsns_per_col(args, files)
    fig.savefig(f'{folder}/desc-{col}-rsn-dist.pdf', bbox_inches='tight')
    plt.close('all')

100%|██████████| 1/1 [00:01<00:00,  1.10s/it]


In [29]:
def plot_enp_dist_all_ics_per_col(args, files, ics_df):
    ncols = 5
    nrows = np.ceil(len(files) / ncols).astype(int)
    fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows))
    fig.tight_layout(h_pad=3, w_pad=3)

    fig.suptitle(f'{SBM} {col}', x=0.0, y=1.0)

    for (idx, row), file in zip(ics_df.iterrows(), files):
        r, c = idx // ncols, idx % ncols
        ax = axs[r, c] if nrows > 1 else axs[c]

        ic_name = row['abbrev']

        enps = ants.image_read(file).numpy()
        enps = enps[enps > 0]

        sns.kdeplot(enps, ax=ax, fill=True, alpha=0.3)
        ax.set(title=f'{idx:02d}: {ic_name}', xlabel=f'effective number of profiles', ylabel='density', xlim=[0.5, 5.5])

    for c_ in range(c+1, ncols):
        ax = axs[r, c_] if nrows > 1 else axs[c_]
        fig.delaxes(ax)
    
    return fig

In [30]:
for col in tqdm(cols):
    folder = f'{ESTIM_path}/group/membership-profiles/{SBM}'
    files = sorted(glob.glob(f'{folder}/ics/*-{col}*.nii.gz'))
    fig = plot_enp_dist_all_ics_per_col(args, files, ics_df)
    fig.savefig(f'{folder}/desc-{col}-ic-dist.pdf', bbox_inches='tight')
    plt.close('all')

100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


In [31]:
# above plot shows that every rsn has some core set of rois that only belong to that rsn, and some rois that share memberships with 2 comms. and then fewer rois sharing with 3 comms. 
# the mode of distribution is always at 1.00, then the second mode is at 2.00. 
# we may see better picture on the brain.