# Dec 3, 2024: roi profile variability
2. nature of profiles: 
    - localized/distributed ?
    - low/high variability across solution modes ?

In [1]:
import csv
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp 
import dill as pickle 
from os.path import join as pjoin
from itertools import product
from tqdm import tqdm
from copy import deepcopy
from pathlib import Path
import subprocess
from scipy import sparse, stats
from scipy.spatial.distance import jensenshannon
from multiprocessing import Pool
import glob
import random

from sklearn.cluster import DBSCAN
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.utils import resample

import arviz as az

import ants
from nipype.interfaces import afni

from itertools import product, combinations, chain
import multiprocessing as mp
from functools import partial

# networks
import graph_tool.all as gt

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.cm import rainbow

plt.rcParamsDefault['font.family'] = "sans-serif"
plt.rcParamsDefault['font.sans-serif'] = "Arial"
plt.rcParams['font.size'] = 14
plt.rcParams["errorbar.capsize"] = 0.5

import cmasher as cmr  # CITE ITS PAPER IN YOUR MANUSCRIPT
import colorcet as cc

# ignore user warnings
import warnings
warnings.filterwarnings("ignore") #, category=UserWarning)

In [2]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

def set_seed(args):
    gt.seed_rng(args.SEED)
    np.random.seed(args.SEED)

set_seed(args)

In [3]:
args.type = 'spatial'
args.roi_size = 225
args.maintain_symmetry = True
args.brain_div = 'whl'
args.num_rois = 162

PARC_DESC = (
    f'type-{args.type}'
    f'_size-{args.roi_size}'
    f'_symm-{args.maintain_symmetry}'
    f'_braindiv-{args.brain_div}'
    f'_nrois-{args.num_rois}'
)

In [None]:
args.GRAPH_DEF = f'constructed'
args.GRAPH_METHOD = f'pearson-corr'
args.THRESHOLDING = f'positive'
args.EDGE_DEF = f'binary'
args.EDGE_DENSITY = 20
args.LAYER_DEF = f'individual'
args.DATA_UNIT = f'sub'

BASE_path = f'{os.environ["HOME"]}/mouse_dataset'
PARCELS_path = f'{BASE_path}/parcels'
ROI_path = f'{BASE_path}/roi_results_v2/{PARC_DESC}'
TS_path = f'{ROI_path}/runwise_timeseries'
ROI_RESULTS_path = (
    f'{ROI_path}'
    f'/graph-{args.GRAPH_DEF}/method-{args.GRAPH_METHOD}'
    f'/threshold-{args.THRESHOLDING}/edge-{args.EDGE_DEF}/density-{args.EDGE_DENSITY}'
    f'/layer-{args.LAYER_DEF}/unit-{args.DATA_UNIT}'
)
RSN_ROI_path = f'{ROI_path}/rsns'
os.system(f'mkdir -p {RSN_ROI_path}')
IC_ROI_path = f'{ROI_path}/ics'
os.system(f'mkdir -p {IC_ROI_path}')
GRAPH_path = f'{ROI_RESULTS_path}/graphs'
os.system(f'mkdir -p {GRAPH_path}')
SBM_path = f'{ROI_RESULTS_path}/model-fits'
os.system(f'mkdir -p {SBM_path}')
ESTIM_path = f'{ROI_RESULTS_path}/estimates'
os.system(f'mkdir -p {ESTIM_path}/individual')
os.system(f'mkdir -p {ESTIM_path}/group')

0

In [5]:
args.dc, args.sbm = True, 'm'

args.nested = True if args.sbm in ['h'] else False

args.force_niter = 40000
args.num_draws = int((1/2) * args.force_niter)

def sbm_name(args):
    dc = f'dc' if args.dc else f'nd'
    dc = f'' if args.sbm in ['m', 'a'] else dc
    file = f'sbm-{dc}-{args.sbm}'
    return file

SBM = sbm_name(args)
SBM

'sbm--m'

In [6]:
def get_membership_matrix(num_rois, df, col='pi'):
    pis = [np.zeros((num_rois, 1)) if np.isnan(pi).all() else pi for pi in df[col]]

    num_modes = len(df)
    num_comms = np.max([pi.shape[-1] for pi in pis])
    num_rois = num_rois
    M = np.zeros((num_rois, num_modes, num_comms)) # membership profile matrix

    for idx_mode, pi in enumerate(pis):
        M[:, idx_mode, :pi.shape[-1]] = pi
    
    return M

In [7]:
marginals_files = sorted(glob.glob(f'{ESTIM_path}/individual/sub-*/partition-modes-group-aligned/{SBM}/desc-mem-mats.pkl', recursive=True))
marginals_df = []
for sbm_file in marginals_files:
    with open(f'{sbm_file}', 'rb') as f:
        row = pickle.load(f)
    marginals_df += [row]
marginals_df = pd.concat(marginals_df).reset_index(drop=True)
mode_ids = list(chain.from_iterable([list(range(count)) for count in marginals_df['sub'].value_counts().sort_index().to_list()]))
marginals_df['mode_id'] = mode_ids
marginals_df

Unnamed: 0,sub,sbm,pi_aligned,omega,mode_id
0,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.806483,0
1,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1....",0.165306,1
2,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [...",0.019968,2
3,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....",0.008243,3
4,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.3338,0
5,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1....",0.3292,1
6,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [...",0.26572,2
7,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....",0.07104,3
8,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.00024,4
9,SLC03,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",0.575495,0


In [8]:
cols = [col for col in  list(marginals_df.columns) if 'pi_' in col]
cols

['pi_aligned']

In [9]:
# SOFT MARGINALS
soft_marginals_df = []
for sub, group in marginals_df.groupby('sub'):
    omegas = group['omega'].to_list()
    dct = {'sub': [sub], 'sbm': [SBM]}
    for col in cols:
        M = get_membership_matrix(args.num_rois, group, col=col)
        SCs = np.average(M, axis=1, weights=omegas) # soft-comms.
        dct[col] = [SCs]
    soft_marginals_df += [pd.DataFrame(dct)]
soft_marginals_df = pd.concat(soft_marginals_df).reset_index(drop=True)
soft_marginals_df

Unnamed: 0,sub,sbm,pi_aligned
0,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0...."
1,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,SLC03,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
3,SLC04,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,SLC05,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
5,SLC06,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
6,SLC07,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
7,SLC08,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0...."
8,SLC09,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."
9,SLC10,sbm--m,"[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0...."


entropies to quantify nature of variability

In [10]:
def mem_entropy(pi):
    if isinstance(pi, np.ndarray):
        ent = stats.entropy(pi, base=2, axis=-1) / np.log2(pi.shape[-1])
        ent = np.nan_to_num(ent)
    elif np.isnan(pi):
        ent = np.zeros((args.num_rois,))
    return ent

In [11]:
marginals_ent_df = pd.concat(
    [
        marginals_df[['sub', 'sbm']],
        marginals_df[cols].applymap(mem_entropy), 
        marginals_df[['omega', 'mode_id']],
    ],
    axis=1
)
marginals_ent_df

Unnamed: 0,sub,sbm,pi_aligned,omega,mode_id
0,SLC01,sbm--m,"[0.0, 0.3248976980461579, 0.0, 0.0, 0.0, 0.0, ...",0.806483,0
1,SLC01,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.165306,1
2,SLC01,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.019968,2
3,SLC01,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.008243,3
4,SLC02,sbm--m,"[0.0, 0.3248976980461579, 0.0, 0.0, 0.0, 0.0, ...",0.3338,0
5,SLC02,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.3292,1
6,SLC02,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.26572,2
7,SLC02,sbm--m,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.07104,3
8,SLC02,sbm--m,"[0.0, 0.02804497878218725, 0.0, 0.0, 0.0, 0.0,...",0.00024,4
9,SLC03,sbm--m,"[0.0, 0.3248976980461579, 0.0, 0.0, 0.0, 0.0, ...",0.575495,0


In [12]:
soft_marginals_ent_df = pd.concat(
    [
        soft_marginals_df[['sub', 'sbm']],
        soft_marginals_df[cols].applymap(mem_entropy)
    ],
    axis=1,
)
soft_marginals_ent_df

Unnamed: 0,sub,sbm,pi_aligned
0,SLC01,sbm--m,"[0.0, 0.33182413188416493, 0.0, 0.0, 0.0, 0.0,..."
1,SLC02,sbm--m,"[0.0, 0.25120297276890546, 0.0, 0.0, 0.0, 0.0,..."
2,SLC03,sbm--m,"[0.0, 0.34729278498775945, 0.0, 0.0, 0.0, 0.0,..."
3,SLC04,sbm--m,"[0.0, 0.2508801408617779, 0.0, 0.0, 0.0, 0.0, ..."
4,SLC05,sbm--m,"[0.0, 0.3248976980461579, 0.0, 0.0, 0.0, 0.0, ..."
5,SLC06,sbm--m,"[0.0, 0.33496483877235245, 0.0, 0.0, 0.0, 0.0,..."
6,SLC07,sbm--m,"[0.0, 0.35932933020295127, 0.0, 0.0, 0.0, 0.0,..."
7,SLC08,sbm--m,"[0.0, 0.32832188726347156, 0.0, 0.0, 0.0, 0.0,..."
8,SLC09,sbm--m,"[0.0, 0.3591825582478951, 0.0, 0.0, 0.0, 0.0, ..."
9,SLC10,sbm--m,"[0.0, 0.3249191765639114, 0.0, 0.0, 0.0, 0.0, ..."


type of profiles nature

In [13]:
def get_mem_variability_type(args, X, Y, omegas):
    '''
    X: mode_ents; num_modes x num_rois
    Y: soft_ent; 1 x num_rois
    omegas: probabilities of mode_ents; num_modes x 1
    '''
    # X = np.repeat(X, (omegas*1000).astype(int), axis=0)
    cil = np.percentile(X, 25, axis=0)
    ciu = np.percentile(X, 75, axis=0)
    lims = np.stack(list(zip(cil, np.median(X, axis=0), ciu)), axis=0)
    within_mode_var = ((lims[:, 0] >= 0.02) | (lims[:, 2] >= 0.13)).astype(int)

    cil = np.percentile(X, 2.5, axis=0)
    ciu = np.percentile(X, 97.5, axis=0)
    across_mode_var = 1 - ((cil <= Y) & (Y <= ciu)).astype(int)

    var_types = np.stack(list(zip(within_mode_var, across_mode_var)), axis=0)
    var_types = np.array([int(''.join(map(str,var_type)), base=2) for var_type in var_types]).reshape(-1, 1)
    var_types = OneHotEncoder(sparse_output=False, categories=[np.array([0, 1, 2, 3])]).fit_transform(var_types)
    # var_types: matrix of size (num_rois x 4)
    return var_types

def get_var_types_per_sub(args, sub, group, soft_ent):
    dct = {'sub':[sub], 'sbm':[SBM]}
    for col in cols:
        X = np.stack(group[col].to_list(), axis=0) # (modes, rois)
        Y = soft_ent # (rois,)
        omegas = group['omega'].to_numpy()
        var_types = get_mem_variability_type(args, X, Y, omegas)
        dct[col] = [var_types]
        # break
    row = pd.DataFrame(dct)
    return row

In [14]:
var_types_df = []
for sub, group in marginals_ent_df.groupby('sub'):
    soft_ent = soft_marginals_ent_df[soft_marginals_ent_df['sub'] == sub][col].to_list()[0]
    row = get_var_types_per_sub(args, sub, group, soft_ent)
    var_types_df += [row]
var_types_df = pd.concat(var_types_df).reset_index(drop=True)
var_types_df

Unnamed: 0,sub,sbm,pi_aligned
0,SLC01,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [..."
1,SLC02,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [..."
2,SLC03,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [..."
3,SLC04,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [..."
4,SLC05,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [..."
5,SLC06,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [..."
6,SLC07,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [..."
7,SLC08,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [..."
8,SLC09,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [..."
9,SLC10,sbm--m,"[[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [..."


In [15]:
parcels_img = ants.image_read(f'{PARCELS_path}/{PARC_DESC}_desc-parcels.nii.gz')
parcels = parcels_img.numpy()
roi_labels = np.loadtxt(f'{PARCELS_path}/{PARC_DESC}_desc-labels.txt')

In [16]:
def concatenate(in_files, out_file):
    try:
        os.remove(out_file)
    except:
        pass

    tcat = afni.TCat()
    tcat.inputs.in_files = in_files
    tcat.inputs.out_file = out_file
    tcat.inputs.rlt = ''
    tcat.cmdline 
    tcat.run()

    for file in in_files:
        try:
            os.remove(file)
        except:
            pass
    return None

def var_types_to_nifti(args, X, folder, name=f'col'):
    os.system(f'mkdir -p {folder}')
    in_files = []
    for idx_type in range(0, 4):
        x = X[:, idx_type]
        x_img = np.zeros_like(parcels)
        for idx, roi in enumerate(roi_labels):
            x_img += (parcels == roi) * (x[idx])
        
        file = f'{folder}/vartype-{idx_type:01d}.nii.gz'
        parcels_img.new_image_like(x_img).to_filename(file)
        in_files.append(file)

    out_file = f'{folder}/desc-{name}.nii.gz'
    concatenate(in_files, out_file)
    return None

In [17]:
# save all var_types per animal
for idx, row in var_types_df.iterrows():
    sub = row['sub']

    out_folder = f'{ESTIM_path}/individual/sub-{sub}/membership-profiles-nature/{SBM}'
    os.system(f'mkdir -p {out_folder}')

    for col in cols:
        X = row[col]
        var_types_to_nifti(args, X, out_folder, name=col)
        # break
    # break

241222-21:08:56,849 nipype.interface INFO:
	 stderr 2024-12-22T21:08:56.849671:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:08:56,853 nipype.interface INFO:
241222-21:08:56,854 nipype.interface INFO:
241222-21:08:56,908 nipype.interface INFO:
	 stderr 2024-12-22T21:08:56.908179:++ elapsed time = 0.1 s
241222-21:08:57,367 nipype.interface INFO:
	 stderr 2024-12-22T21:08:57.367409:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:08:57,371 nipype.interface INFO:
241222-21:08:57,373 nipype.interface INFO:
241222-21:08:57,425 nipype.interface INFO:
	 stderr 2024-12-22T21:08:57.425120:++ elapsed time = 0.1 s
241222-21:08:57,887 nipype.interface INFO:
	 stderr 2024-12-22T21:08:57.887072:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:08:57,890 nipype.interface INFO:
241222-21:08:57,893 nipype.interface INFO:
241222-21:08:57,946 nipype.interface INFO:
	 stderr 2024-12-22T21:08:57.945974:++ elapsed time = 0.1 s
241222-21:08:58,

group level aggregation

In [18]:
def bootstrap(matrices, n_bootstrap=1000):
    stacked = np.stack(matrices)
    bootstrap_means = np.stack([
        np.mean(resample(stacked), axis=0)
        for _ in range(n_bootstrap)
    ])
    return (
        np.mean(bootstrap_means, axis=0), # mean := probability of var type per roi
        np.percentile(bootstrap_means, [2.5, 97.5], axis=0) # CI
    )

def analyze_group_var_types(matrices):
    # probabilities with bootstrap replacement
    probabilities, ci = bootstrap(matrices, )

    # Get mode and its probability (most frequent column/var_type); may have multiple
    modes, mode_probs = list(zip(*[(list(np.where(row == np.max(row))[0]), np.max(row)) for row in probabilities]))
    modes = MultiLabelBinarizer(classes=list(range(4))).fit_transform(modes)

    # Calculate entropy to measure uncertainty
    # High entropy means more uncertainty in the distribution
    entropy = stats.entropy(probabilities, base=2, axis=-1) / np.log2(probabilities.shape[-1])

    return {
        'modes': modes,
        'mode_probabilities': mode_probs,
        'entropy': entropy,
        'full_distribution': probabilities,
    }

In [19]:
out_folder = f'{ESTIM_path}/group/membership-profiles-nature/{SBM}'
os.system(f'mkdir -p {out_folder}')

for col in cols:
    X = var_types_df[col].to_list()
    group_var_stats = analyze_group_var_types(X)

    with open(f'{out_folder}/desc-{col}.pkl', 'wb') as f:
        pickle.dump([group_var_stats], f)
    
    X = group_var_stats['full_distribution']
    var_types_to_nifti(args, X, out_folder, name=f'{col}-full-dist')
    X = group_var_stats['modes']
    var_types_to_nifti(args, X, out_folder, name=f'{col}-modes')
    # break

241222-21:09:02,115 nipype.interface INFO:
	 stderr 2024-12-22T21:09:02.115582:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:02,119 nipype.interface INFO:
241222-21:09:02,121 nipype.interface INFO:
241222-21:09:02,168 nipype.interface INFO:
	 stderr 2024-12-22T21:09:02.168347:++ elapsed time = 0.1 s
241222-21:09:02,688 nipype.interface INFO:
	 stderr 2024-12-22T21:09:02.688546:++ 3dTcat: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:02,692 nipype.interface INFO:
241222-21:09:02,695 nipype.interface INFO:
241222-21:09:02,751 nipype.interface INFO:
	 stderr 2024-12-22T21:09:02.751784:++ elapsed time = 0.1 s


variability-types/profiles-nature per rsn

In [20]:
def apply_rsn_mask(args, nii_file, rsn_file, out_file): #(args, folder, name, rsn_name):
    calc = afni.Calc()
    calc.inputs.in_file_a = nii_file #f'{folder}/desc-{name}.nii.gz'
    calc.inputs.in_file_b = rsn_file #f'{RSN_ROI_path}/desc-{rsn_name}-mask.nii.gz'
    calc.inputs.expr = 'a*b'
    calc.inputs.out_file = out_file #f'{folder}/rsn-{rsn_name}_desc-{name}.nii.gz'
    calc.inputs.outputtype = 'NIFTI'
    calc.inputs.overwrite = True
    calc.run()
    return None

In [21]:
nii_files = sorted(glob.glob(f'{out_folder}/*.nii.gz'))
# nii_files

In [22]:
rsn_files = sorted(glob.glob(f'{RSN_ROI_path}/*-mask.nii.gz', recursive=True))
# rsn_files

In [23]:
for rsn_file in (rsn_files):
    rsn_name = [n for n in rsn_file.split('/') if 'desc-' in n][0].split('-')[2]
    
    out_folder = f'{ESTIM_path}/group/membership-profiles-nature/{SBM}/rsns/rsn-{rsn_name}'
    os.system(f'mkdir -p {out_folder}')

    for nii_file in nii_files:
        nii_name = [n for n in nii_file.split('/') if 'desc-' in n][0]
        out_file = f'{out_folder}/{nii_name}'
        apply_rsn_mask(args, nii_file, rsn_file, out_file)
        # break
    # break

241222-21:09:03,10 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.010171:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:03,12 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.010171:++ Authored by: A cast of thousands
241222-21:09:03,82 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.082526:++ Output dataset /home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/graph-constructed/method-pearson-corr/threshold-positive/edge-binary/density-20/layer-individual/unit-sub/estimates/group/membership-profiles-nature/sbm--m/rsns/rsn-basal_ganglia/desc-pi_aligned-full-dist.nii.gz
241222-21:09:03,296 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.296274:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:03,298 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.296274:++ Authored by: A cast of thousands
241222-21:09:03,364 nipype.interface INFO:
	 stderr 2024-12-22T21:09:03.364319:++ Outpu

In [24]:
ic_files = sorted(glob.glob(f'{IC_ROI_path}/*-mask*.nii.gz', recursive=True))
ics_df = pd.read_csv(f'{IC_ROI_path}/ic_names.csv')
ic_files

['/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-00-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-01-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-02-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-03-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-04-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-05-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/ics/desc-j-06-mask.nii.gz',
 '/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-

In [25]:
for ic_file, (idx, row) in zip(ic_files, ics_df.iterrows()):
    ic_name = row['abbrev']

    out_folder = f'{ESTIM_path}/group/membership-profiles-nature/{SBM}/ics/ic-{idx:02d}'
    os.system(f'mkdir -p {out_folder}')

    for nii_file in nii_files:
        nii_name = [n for n in nii_file.split('/') if 'desc-' in n][0]
        out_file = f'{out_folder}/{nii_name}'
        apply_rsn_mask(args, nii_file, ic_file, out_file)

241222-21:09:06,502 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.502554:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:06,505 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.502554:++ Authored by: A cast of thousands
241222-21:09:06,562 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.562419:++ Output dataset /home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/graph-constructed/method-pearson-corr/threshold-positive/edge-binary/density-20/layer-individual/unit-sub/estimates/group/membership-profiles-nature/sbm--m/ics/ic-00/desc-pi_aligned-full-dist.nii.gz
241222-21:09:06,785 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.785678:++ 3dcalc: AFNI version=AFNI_20.2.18 (Sep 17 2020) [64-bit]
241222-21:09:06,788 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.785678:++ Authored by: A cast of thousands
241222-21:09:06,855 nipype.interface INFO:
	 stderr 2024-12-22T21:09:06.855927:++ Output dataset 

In [26]:
# distribution of variability-type-probabilities per rsn

In [27]:
def get_vals_per_var_type(file):
    vimg = ants.image_read(file)
    vs = vimg.numpy()
    mask = vs > 0
    vals = [vs[..., i][mask[..., i]] for i in range(vs.shape[-1])]

    vals_df = pd.DataFrame({
        'lms, lsv': [vals[0]],
        'lms, hsv': [vals[1]],
        'dms, lsv': [vals[2]],
        'dms, hsv': [vals[3]],
    })
    vals_df = vals_df.melt().explode('value')
    vals_df.columns = ['variability_type', 'fraction of animals']
    return vals_df.fillna(0.0)

def plot_var_type_probs_all_rsns(args, files):
    full_labels = [
        'lms: localized membership per mode/solution', 'dms: distributed membership per mode/solution', 
        'lsv: low solution variability', 'hsv: high solution variability'
    ]
    handles = [plt.Line2D([], [], marker='_', color='w', markersize=12, label=label) for label in full_labels]

    ncols = 5
    nrows = np.ceil(len(files) / ncols).astype(int)
    fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows))
    fig.tight_layout(h_pad=3, w_pad=3)

    fig.suptitle(f'{SBM} {col}', x=0.0, y=1.0)

    for idx, file in enumerate(files):
        r, c = idx // ncols, idx % ncols
        ax = axs[r, c] if nrows > 1 else axs[c]

        rsn_name = [n for n in file.split('/') if 'rsn-' in n][0].split('-')[1]

        vals_df = get_vals_per_var_type(file)
        g = sns.kdeplot(vals_df, x='fraction of animals', hue='variability_type', fill=True, alpha=0.1, ax=ax)
        sns.move_legend(ax, loc='upper left')
        ax.set(title=f'{rsn_name}', xlim=[0.0, 1.0])
        # break

    for c_ in range(c+1, ncols):
        ax = axs[r, c_] if nrows > 1 else axs[c_]
        fig.delaxes(ax)
    
    fig.legend(handles=handles, loc='lower right', ncol=1)
    
    return fig

def plot_var_type_probs_all_ics(args, files, ics_df):
    full_labels = [
        'lms: localized membership per mode/solution', 'dms: distributed membership per mode/solution', 
        'lsv: low solution variability', 'hsv: high solution variability'
    ]
    handles = [plt.Line2D([], [], marker='_', color='w', markersize=12, label=label) for label in full_labels]

    ncols = 5
    nrows = np.ceil(len(files) / ncols).astype(int)
    fig, axs = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows))
    fig.tight_layout(h_pad=3, w_pad=3)

    fig.suptitle(f'{SBM} {col}', x=0.0, y=1.0)

    for (idx, row), file in zip(ics_df.iterrows(), files):
        r, c = idx // ncols, idx % ncols
        ax = axs[r, c] if nrows > 1 else axs[c]

        ic_name = row['abbrev']

        vals_df = get_vals_per_var_type(file)
        g = sns.kdeplot(vals_df, x='fraction of animals', hue='variability_type', fill=True, alpha=0.1, ax=ax)
        sns.move_legend(ax, loc='upper left')
        ax.set(title=f'{idx:02d}: {ic_name}', xlim=[0.0, 1.0])
        # break

    for c_ in range(c+1, ncols):
        ax = axs[r, c_] if nrows > 1 else axs[c_]
        fig.delaxes(ax)
    
    fig.legend(handles=handles, loc='lower right', ncol=1)
    
    return fig

In [28]:
out_folder = f'{ESTIM_path}/group/membership-profiles-nature/{SBM}'
for col in cols:
    files = sorted(glob.glob(f'{out_folder}/rsns/rsn-*/*-{col}-full-dist*'))
    fig = plot_var_type_probs_all_rsns(args, files)
    out_file = f'{out_folder}/desc-{col}-rsn-dist.pdf'
    fig.savefig(f'{out_file}', bbox_inches='tight')
    plt.close('all')

In [29]:
out_folder = f'{ESTIM_path}/group/membership-profiles-nature/{SBM}'
for col in cols:
    files = sorted(glob.glob(f'{out_folder}/ics/ic-*/*-{col}-full-dist*'))
    fig = plot_var_type_probs_all_ics(args, files, ics_df)
    out_file = f'{out_folder}/desc-{col}-ic-dist.pdf'
    fig.savefig(f'{out_file}', bbox_inches='tight')
    plt.close('all')