# Oct 17, 2025: enlist rois per comm archetype

conda env: gt

In [1]:
import csv
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp 
import dill as pickle 
from os.path import join as pjoin
from itertools import product
from tqdm import tqdm
from copy import deepcopy
from pathlib import Path
import re
from scipy import stats
from scipy.spatial.distance import jensenshannon, squareform, pdist
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from sklearn.metrics import silhouette_score

import glob
import random

from itertools import product, combinations
import multiprocessing as mp
from functools import partial
from joblib import Parallel, delayed

from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from munkres import Munkres

# networks
import graph_tool.all as gt

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.cm import rainbow
from cycler import cycler

plt.rcParamsDefault['font.family'] = "sans-serif"
plt.rcParamsDefault['font.sans-serif'] = "Arial"
plt.rcParams['font.size'] = 14
plt.rcParams["errorbar.capsize"] = 0.5

import colorcet as cc

# ignore user warnings
import warnings
warnings.filterwarnings("ignore") #, category=UserWarning)

In [2]:
def get_colorblind_palette(n=20):
    """Return a merged, deduplicated colorblind-safe palette from CUD, Seaborn, and extended sources."""
    base = [
        "#0072B2", "#D55E00", "#009E73", "#CC79A7",
        "#F0E442", "#56B4E9", "#E69F00", "#000000",
        "#999999", "#882255", "#44AA99", "#117733"
    ]
    
    new = [
        "#0173B2", "#DE8F05", "#029E73", "#D55E00",  # D55E00 already in base
        "#CC78BC", "#CA9161", "#FBAFE4", "#949494",
        "#ECE133", "#56B4E9"  # 56B4E9 already in base
    ]
    
    # Deduplicate while preserving order
    seen = set()
    full = base + new
    merged = []
    for color in full:
        if color.lower() not in seen:
            merged.append(color)
            seen.add(color.lower())
    
    return merged[:n]

def setup_mpl(fontsize=7):
    """Configure matplotlib for Illustrator export with Helvetica-style fonts and clean styles."""
    
    CUD_COLORS = get_colorblind_palette()
    
    mpl.rcParams.update({
        # Fonts and layout
        "font.family": "sans-serif",
        "font.sans-serif": ["Arial", "DejaVu Sans"],
        "font.size": fontsize,
        "axes.titlesize": fontsize,
        "axes.labelsize": fontsize,
        "xtick.labelsize": fontsize,
        "ytick.labelsize": fontsize,
        "legend.fontsize": fontsize,

        # Export settings
        "svg.fonttype": 'none',
        "pdf.fonttype": 42,
        "ps.fonttype": 42,
        "figure.dpi": 300,
        "savefig.dpi": 300,
        "text.usetex": False,

        # Axes and ticks
        "axes.linewidth": 0.5,
        "xtick.major.width": 0.5,
        "ytick.major.width": 0.5,
        "xtick.minor.width": 0.5,
        "ytick.minor.width": 0.5,
        "xtick.major.size": 2.5,
        "ytick.major.size": 2.5,

        # Lines
        "lines.linewidth": 1.0,

        # Default color cycle (Color Universal Design)
        "axes.prop_cycle": cycler('color', CUD_COLORS),
    })

    # mpl.rcParams["axes.prop_cycle"] = cycler('color', get_colorblind_palette())

In [3]:
setup_mpl(fontsize=7)
CUD_COLORS = get_colorblind_palette()

In [4]:
import seaborn as sns

In [5]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

In [6]:
args.source = 'allen' #'spatial' #'allen'
args.space = 'ccfv2' #'ccfv2'
args.brain_div = 'whl' #'whl'
args.num_rois = 172 #162 #172
args.resolution = 200 #200

PARC_DESC = (
    f'source-{args.source}'
    f'_space-{args.space}'
    f'_braindiv-{args.brain_div}'
    f'_nrois-{args.num_rois}'
    f'_res-{args.resolution}'
)
PARC_DESC

'source-allen_space-ccfv2_braindiv-whl_nrois-172_res-200'

In [7]:
args.GRAPH_DEF = f'constructed'
args.GRAPH_METHOD = f'pearson'
args.THRESHOLD = f'signed'
args.EDGE_DEF = f'binary'
args.EDGE_DENSITY = 20
args.LAYER_DEF = f'individual'
args.DATA_UNIT = f'grp'

BASE_path = f'{os.environ["HOME"]}/new_mouse_dataset'
PARCELS_path = f'{BASE_path}/parcels'
ROI_path = (
    f'{BASE_path}/roi-results-v3'
    f'/{PARC_DESC}'
)
TS_path = f'{ROI_path}/roi_timeseries'

In [8]:
ROI_RESULTS_path = (
    f'{ROI_path}'
    f'/graph-{args.GRAPH_DEF}/method-{args.GRAPH_METHOD}'
    f'/threshold-{args.THRESHOLD}/edge-{args.EDGE_DEF}/density-{args.EDGE_DENSITY}'
    f'/layer-{args.LAYER_DEF}/unit-{args.DATA_UNIT}'
)
GRAPH_path = f'{ROI_RESULTS_path}/graphs'
os.system(f'mkdir -p {GRAPH_path}')
SBM_path = f'{ROI_RESULTS_path}/model-fits'
os.system(f'mkdir -p {SBM_path}')
DIAG_path = f'{ROI_RESULTS_path}/diagnostics'
os.system(f'mkdir -p {DIAG_path}')
ESTIM_path = f'{ROI_RESULTS_path}/estimates'
os.system(f'mkdir -p {ESTIM_path}/individual')
os.system(f'mkdir -p {ESTIM_path}/group')

0

In [9]:
args.dc, args.sbm = False, 'h'
args.nested = args.sbm == 'h'

args.force_niter = 100000
args.num_draws = int((1/2) * args.force_niter)

args.epsilon = 0.4 # threshold KSD for convergence
args.delta = np.ceil(args.force_niter / 100).astype(int)

def sbm_name(args):
    dc = f'dc' if args.dc else f'nd'
    dc = f'' if args.sbm in ['a', 'm'] else dc
    file = f'sbm-{dc}-{args.sbm}'
    return file

SBM = sbm_name(args)
SBM

'sbm-nd-h'

roi info

In [10]:
parcels_file = f'{PARCELS_path}/{PARC_DESC}_desc-parcels.nii.gz'
# parcels_img = nib.load(parcels_file)

try:
    roi_table = pd.read_csv(f'{PARCELS_path}/{PARC_DESC}_desc-names.csv')
    roi_labels = roi_table['roi'].to_numpy()
except:
    roi_labels = np.arange(1, args.num_rois+1)

In [11]:
roi_table

Unnamed: 0,old_roi,name,roi
0,1,"R-Frontal pole, cerebral cortex (FRP,184)",1
1,2,"R-Primary motor area (MOp,985)",2
2,3,"R-Secondary motor area (MOs,993)",3
3,4,"R-Primary somatosensory area, nose (SSp-n,353)",4
4,5,"R-Primary somatosensory area, barrel field (SS...",5
...,...,...,...
167,168,"L-Medulla, sensory related (MY-sen,472)",168
168,169,"L-Medulla, motor related (MY-mot,456)",169
169,170,"L-Medulla, behavioral state related (MY-sat,465)",170
170,171,"L-Cerebellar cortex (CBX,614)",171


load archetypes

In [12]:
with open(f'{ESTIM_path}/group/multiplicity/community/{SBM}/desc-centroids.pkl', 'rb') as f:
    centroids_df = pickle.load(f)

In [13]:
centroids_df

Unnamed: 0,sbm,col,comm,centroids
0,sbm-nd-h,pi_1_aligned,0,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,sbm-nd-h,pi_1_aligned,1,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,sbm-nd-h,pi_1_aligned,2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,sbm-nd-h,pi_1_aligned,3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,sbm-nd-h,pi_1_aligned,4,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
5,sbm-nd-h,pi_1_aligned,5,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
6,sbm-nd-h,pi_1_aligned,6,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
7,sbm-nd-h,pi_1_aligned,7,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8,sbm-nd-h,pi_1_aligned,8,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


roi list per comm.

In [14]:
comm_rois_df = {}
for idx, row in centroids_df.iterrows():
    comm = row['comm']
    Cs = row['centroids'].T
    col = row['col']
    level = col.replace('pi', '').replace('_', '').replace('aligned', '')

    folder = f'{ESTIM_path}/group/multiplicity/community/{SBM}/archetype-roi-composition/level-{level}'
    os.makedirs(folder, exist_ok=True)
    
    comm_rois = []
    for a in range(Cs.shape[-1]):
        comm_vec = Cs[:, a]
        rois_df = roi_table.copy(deep=True)
        rois_df['archetype'] = [a]*len(comm_vec)
        rois_df['membership'] = comm_vec
        rois_df = rois_df.sort_values(by=['membership'], ascending=False)
        rois_df = rois_df[rois_df['membership'] > 0.2]
        comm_rois += [rois_df[['archetype', 'name', 'membership']]]
    comm_rois_df[comm+1] = pd.concat(comm_rois, ignore_index=True)

for comm in comm_rois_df.keys():
    comm_rois_df[comm].to_csv(f'{folder}/comm-{comm}_desc-comm-rois.csv')
    
comm_rois_df = pd.concat(
    comm_rois_df.values(),
    keys=comm_rois_df.keys(),
    names=['comm', 'roi_id'],
).reset_index()

comm_rois_df.to_csv(f'{folder}/desc-comm-rois.csv')

stable vs peripheral rois per comm across archetypes

In [15]:
comm_rois_df

Unnamed: 0,comm,roi_id,archetype,name,membership
0,1,0,1,"R-Main olfactory bulb (MOB,507)",0.778
1,1,1,1,"R-Taenia tecta (TT,589)",0.778
2,1,2,1,"L-Taenia tecta (TT,675)",0.778
3,1,3,1,"L-Main olfactory bulb (MOB,593)",0.775
4,1,4,1,"R-Dorsal peduncular area (DP,814)",0.729
...,...,...,...,...,...
638,9,74,4,"L-Midbrain, behavioral state related (MBsta,434)",0.247
639,9,75,4,"R-Midbrain, sensory related (MBsen,339)",0.240
640,9,76,4,"R-Postpiriform transition area (TR,566)",0.233
641,9,77,4,"R-Midbrain, motor related (MBmot,323)",0.233


In [16]:
comms = list(sorted(comm_rois_df['comm'].unique()))
roi_col = 'name'
score_threshold = 0.5
results = {}
for comm in comms:
    comm_df = comm_rois_df[comm_rois_df['comm'] == comm]
    num_archs = len(comm_df['archetype'].unique())
    
    roi_stats = comm_df.groupby(roi_col).agg(
        stability_score=('membership', 'mean'),
    )
    
    stable_core = roi_stats[roi_stats['stability_score'] >= score_threshold].index.tolist()
    flexible_perihpery = roi_stats[roi_stats['stability_score'] < score_threshold].index.tolist()
    
    results[comm] = {
        'stable_core': (stable_core),
        'flexible_periphery': (flexible_perihpery),
    }

In [17]:
stability_df = pd.DataFrame.from_dict(results, orient='index')
stability_df.index.name = 'comm'
stability_df = stability_df.reset_index()
stability_df

Unnamed: 0,comm,stable_core,flexible_periphery
0,1,"[L-Accessory olfactory bulb (AOB,237), L-Anter...","[L-Agranular insular area, dorsal part (AId,19..."
1,2,"[L-Anterior area (VISa,312782632), L-Anteromed...","[L-Anterior cingulate area, dorsal part (ACAd,..."
2,3,"[L-Accessory olfactory bulb (AOB,237), L-Agran...","[L-Anterior cingulate area, dorsal part (ACAd,..."
3,4,"[L-Accessory olfactory bulb (AOB,237), L-Anter...","[L-Dorsal peduncular area (DP,900), L-Frontal ..."
4,5,"[L-Agranular insular area, posterior part (AIp...","[L-Anterior area (VISa,312782632), L-Primary a..."
5,6,"[L-Agranular insular area, posterior part (AIp...","[L-Lateral septal complex (LSX,361), L-Midbrai..."
6,7,"[L-Agranular insular area, ventral part (AIv,2...","[L-Endopiriform nucleus (EP,1028)]"
7,8,"[L-Cerebellar cortex (CBX,614), L-Cerebellar n...","[R-Ectorhinal area (ECT,895), R-Hippocampal re..."
8,9,"[L-Cerebellar cortex (CBX,614), L-Cerebellar n...","[L-Medulla, sensory related (MY-sen,472), L-Mi..."


In [18]:
core_df = stability_df[['comm', 'stable_core']].copy()
core_df = core_df.explode('stable_core').dropna()
core_df['type'] = 'stable_core'
core_df = core_df.rename(columns={'stable_core': 'roi_name'})
core_df

periphery_df = stability_df[['comm', 'flexible_periphery']].copy()
periphery_df = periphery_df.explode('flexible_periphery').dropna()
periphery_df['type'] = 'flexible_periphery'
periphery_df = periphery_df.rename(columns={'flexible_periphery': 'roi_name'})
periphery_df

comm_rois_df = pd.concat([core_df, periphery_df], ignore_index=True).reset_index(drop=True)
comm_rois_df = comm_rois_df[['comm', 'roi_name', 'type']].reset_index(drop=True)
comm_rois_df

Unnamed: 0,comm,roi_name,type
0,1,"L-Accessory olfactory bulb (AOB,237)",stable_core
1,1,"L-Anterior olfactory nucleus (AON,245)",stable_core
2,1,"L-Dorsal peduncular area (DP,900)",stable_core
3,1,"L-Frontal pole, cerebral cortex (FLP,184)",stable_core
4,1,"L-Gustatory areas (GU,1143)",stable_core
...,...,...,...
353,9,"R-Midbrain, sensory related (MBsen,339)",flexible_periphery
354,9,"R-Piriform-amygdalar area (PAA,788)",flexible_periphery
355,9,"R-Posterior amygdalar nucleus (PA,780)",flexible_periphery
356,9,"R-Postpiriform transition area (TR,566)",flexible_periphery


In [19]:
folder = f'{ESTIM_path}/group/multiplicity/community/{SBM}/archetype-roi-composition/level-{level}'
os.makedirs(folder, exist_ok=True)

comm_rois_df.to_csv(f'{folder}/desc-stable-flexible-rois.csv')