# July 19, 2024: multilayer graphs
one graph per animal, with sessions as layers in the graph

In [1]:
import csv
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp 
import pickle 
from os.path import join as pjoin
from itertools import product
from tqdm import tqdm
from copy import deepcopy
from pathlib import Path
import subprocess
from scipy import sparse, stats
from multiprocessing import Pool
import glob

# networks
import graph_tool.all as gt

# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.cm import rainbow

plt.rcParamsDefault['font.family'] = "sans-serif"
plt.rcParamsDefault['font.sans-serif'] = "Arial"
plt.rcParams['font.size'] = 14
plt.rcParams["errorbar.capsize"] = 0.5

import cmasher as cmr  # CITE ITS PAPER IN YOUR MANUSCRIPT

# ignore user warnings
import warnings
warnings.filterwarnings("ignore") #, category=UserWarning)

In [2]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

In [3]:
args.type = 'spatial'
args.roi_size = 225
args.maintain_symmetry = True
args.brain_div = 'whl'
args.num_rois = 162

DESC = (
    f'type-{args.type}'
    f'_size-{args.roi_size}'
    f'_symm-{args.maintain_symmetry}'
    f'_braindiv-{args.brain_div}'
    f'_nrois-{args.num_rois}'
)

args.graph_type = 'correlation_graph' 
args.collection = f'multilayer' #f'individual'
args.unit = 'subwise' # runwise/seswise/subwise/grpwise : unit sample of dataset
args.denst = 25 # density of fc matrix

BASE_path = f'{os.environ["HOME"]}/mouse_dataset/roi_results_v2'
ROI_path = f'{BASE_path}/{DESC}'
TS_path = f'{ROI_path}/roi_timeseries'
ROI_RESULTS_path = f'{BASE_path}/{DESC}/{args.graph_type}/{args.collection}/{args.unit}/density-{args.denst}'
GRAPH_path = f'{ROI_RESULTS_path}/graphs'
os.system(f'mkdir -p {GRAPH_path}')

0

In [4]:
def create_multilayer_graph(args, gs):
    edges = []
    weights = []
    for ses, g in enumerate(gs):
        edges += [g.get_edges()]
        weights += [np.array([ses] * len(g.get_edges()))]
    edges = np.concatenate(edges)
    weights = np.concatenate(weights)
    g = gt.Graph(
        list(zip(*[*list(zip(*edges)), weights])),
        eprops=[('weight', 'int16_t')],
        directed=False,
    )
    return g

In [5]:
all_units = ['grp', 'sub', 'ses', 'run',]
args.indiv_unit = all_units[[i+1 for i, u in enumerate(all_units) if u == args.unit[:3]][0]]
INDIV_GRAPHS_path = f'{BASE_path}/{DESC}/{args.graph_type}/individual/{args.indiv_unit}wise/density-{args.denst}/graphs'
INDIV_GRAPHS_path

'/home/govindas/mouse_dataset/roi_results_v2/type-spatial_size-225_symm-True_braindiv-whl_nrois-162/correlation_graph/individual/seswise/density-25/graphs'

In [6]:
def load_graph(args, indiv_file):
    df = pd.DataFrame({})
    fs = indiv_file.split('/')[-1].split('_')
    for f_ in fs:
        col = f_.split('-')[0]
        val = f_.split('-')[-1]
        df[col] = [val]
    df['graph'] = gt.load_graph(indiv_file)
    return df

indiv_graphs_df = []
indiv_files = sorted(glob.glob(f'{INDIV_GRAPHS_path}/*', recursive=True))
for indiv_file in indiv_files:
    df = load_graph(args, indiv_file)
    indiv_graphs_df.append(df)
indiv_graphs_df = pd.concat(indiv_graphs_df).reset_index(drop=True)
indiv_graphs_df

Unnamed: 0,sub,ses,desc,graph
0,SLC01,1,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
1,SLC01,2,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
2,SLC01,3,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
3,SLC02,1,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
4,SLC02,2,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
5,SLC02,3,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
6,SLC03,1,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
7,SLC03,2,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
8,SLC03,3,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."
9,SLC04,1,mat.gt.gz,"<Graph object, undirected, with 162 vertices a..."


In [7]:
unit = args.unit[:3]
by = []
for c in indiv_graphs_df.columns.to_list():
    by += [c]
    if c == unit: break

multilayer_graphs_df = []
for key, group_df in indiv_graphs_df.groupby(by=by):
    if not type(key) == tuple: key = (key,)
    g = create_multilayer_graph(
        args, 
        group_df['graph'].to_list()
    )
    
    df = pd.DataFrame({})
    for b, k in zip(by, key):
        df[b] = [k]
    df['graph'] = [g]
    multilayer_graphs_df += [df]
multilayer_graphs_df = pd.concat(multilayer_graphs_df).reset_index(drop=True)
multilayer_graphs_df

Unnamed: 0,sub,graph
0,SLC01,"<Graph object, undirected, with 162 vertices a..."
1,SLC02,"<Graph object, undirected, with 162 vertices a..."
2,SLC03,"<Graph object, undirected, with 162 vertices a..."
3,SLC04,"<Graph object, undirected, with 162 vertices a..."
4,SLC05,"<Graph object, undirected, with 162 vertices a..."
5,SLC06,"<Graph object, undirected, with 162 vertices a..."
6,SLC07,"<Graph object, undirected, with 162 vertices a..."
7,SLC08,"<Graph object, undirected, with 162 vertices a..."
8,SLC09,"<Graph object, undirected, with 162 vertices a..."
9,SLC10,"<Graph object, undirected, with 162 vertices a..."


In [8]:
# save all graphs
GRAPH_path
for idx, row in multilayer_graphs_df.iterrows():
    file = [f'{col}-{row[col]}' for col in row.index.to_list()[:-1]]
    file += [f'desc-corr-mat.gt.gz']
    file = '_'.join(file)
    file = f'{GRAPH_path}/{file}'
    row['graph'].save(file)

In [24]:
args.B = 1
args.dc = True

state_df = pd.DataFrame(columns=['a', 'd', 'o', 'h'],)
state_df.loc['state'] = [
    gt.PPBlockState(
        g,
    ), 
    gt.LayeredBlockState(
        g,
        ec=g.ep.weight,
        B=args.B,
        layers=True,
        deg_corr=args.dc,
        overlap=False,
    ), 
    gt.LayeredBlockState(
        g, 
        ec=g.ep.weight,
        B=args.B,
        layers=True,
        deg_corr=args.dc,
        overlap=True,
    ), 
    gt.NestedBlockState(
        g,
        base_type=gt.LayeredBlockState,
        state_args=dict(
            ec=g.ep.weight,
            B=args.B,
            layers=True, 
            deg_corr=args.dc, 
            overlap=False,
        )    
    ),
]

state_df['h']['state']

<NestedBlockState object, with base <LayeredBlockState object with 1 blocks, 3 layers, degree-corrected, for graph <Graph object, undirected, with 162 vertices and 9780 edges, 1 internal edge property, at 0x7f74011e9dc0>, at 0x7f73d10c3130>, and 9 levels of sizes [(162, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] at 0x7f73d1126d00>

---

In [9]:
# def mcmc_eq(args, g, state):
#     bs = [] # partitions
#     Bs = np.zeros(g.num_vertices() + 1) # number of blocks
#     Bes = [] # number of effective blocks
#     dls = [] # description length history
#     def collect_partitions(s):
#         bs.append(s.b.a.copy())
#         B = s.get_nonempty_B()
#         Bs[B] += 1
#         Bes.append(s.get_Be())
#         dls.append(s.entropy())
        
#     gt.mcmc_equilibrate(
#         state, 
#         wait=args.wait, 
#         force_niter=args.force_niter,
#         mcmc_args=dict(niter=args.niter), 
#         callback=collect_partitions,
#     )
#     return state, bs, Bs, Bes, dls

# def nested_mcmc_eq(args, g, state):
#     bs = []
#     Bs = [np.zeros(g.num_vertices() + 1) for s in state.get_levels()]
#     Bes = [[] for s in state.get_levels()]
#     dls = []
#     def collect_partitions(s):
#         bs.append(s.get_bs())
#         for l, sl in enumerate(s.get_levels()):
#             B = sl.get_nonempty_B()
#             Bs[l][B] += 1
#             Be = sl.get_Be()
#             Bes[l].append(Be)
#         dls.append(s.entropy())
        
#     gt.mcmc_equilibrate(
#         state, 
#         wait=args.wait, 
#         force_niter=args.force_niter, 
#         mcmc_args=dict(niter=args.niter),
#         callback=collect_partitions,
#     )
#     return state, bs, Bs, Bes, dls

# def posterior_modes(args, bs):
#     pmode = gt.ModeClusterState(bs, nested=args.nested)
#     gt.mcmc_equilibrate(pmode, wait=1, mcmc_args=dict(niter=1, beta=np.inf))
#     return pmode

In [10]:
# args.nested = False
# args.deg_corr = False
# args.overlap = False

# if not args.nested:
#     # state = gt.LayeredBlockState(
#     #     g, 
#     #     ec=g.ep.weight,
#     #     B=10,
#     #     layers=True, 
#     #     deg_corr=args.deg_corr,
#     #     overlap=args.overlap,
#     # )
#     state = gt.PPBlockState(
#         g, 
#     )
# else: 
#     state = gt.NestedBlockState(
#         g, 
#         base_type=gt.LayeredBlockState, 
#         state_args=dict(
#             ec=g.ep.weight, 
#             B=41,
#             layers=True, 
#             deg_corr=args.deg_corr,
#             overlap=args.overlap,
#         )
#     )

# args.wait = 12000
# args.force_niter = 25000
# args.niter = 10

# if not args.nested:
#     state, bs, Bs, Bes, dls = mcmc_eq(args, g, state)
# else:
#     state, bs, Bs, Bes, dls = nested_mcmc_eq(args, g, state)

# pmode = posterior_modes(args, bs)

In [11]:
# pmode.get_modes()

In [12]:
# modes = pmode.get_modes()
# omegas = []
# for mode in modes:
#     omegas += [mode.get_M() / args.force_niter]
# plt.plot(omegas)

---

In [13]:
# g = gt.collection.ns["new_guinea_tribes"]

# args.nested = False
# args.deg_corr = True
# args.overlap = False

# if not args.nested:
#     state = gt.LayeredBlockState(
#         g, 
#         ec=g.ep.weight,
#         B=10,
#         layers=True, 
#         deg_corr=args.deg_corr,
#         overlap=args.overlap,
#     )
# else: 
#     state = gt.NestedBlockState(
#         g, 
#         base_type=gt.LayeredBlockState, 
#         state_args=dict(
#             ec=g.ep.weight, 
#             B=10,
#             layers=True, 
#             deg_corr=args.deg_corr,
#             overlap=args.overlap,
#         )
#     )

# args.wait = 12
# args.force_niter = 100
# args.niter = 10

# if not args.nested:
#     state, bs, Bs, Bes, dls = mcmc_eq(args, g, state)
# else:
#     state, bs, Bs, Bes, dls = nested_mcmc_eq(args, g, state)

# pmode = posterior_modes(args, bs)

# # state = gt.minimize_nested_blockmodel_dl(
# #     g,
# #     state_args=dict(
# #         base_type=gt.LayeredBlockState,
# #         state_args=dict(ec=g.ep.weight, layers=True, deg_corr=True),
# #     )
# # )

In [14]:
# state

In [15]:
# modes = pmode.get_modes()
# print(len(modes))
# modes[0].get_M()

In [16]:
# class ARGS():
#     pass

# args = ARGS()

# args.SEED = 100

In [17]:
# g = gt.collection.ns["new_guinea_tribes"]
# state = gt.PPBlockState(
#     g, 
# )
# # state = gt.minimize_blockmodel_dl(g, state=gt.PPBlockState)
# args.wait = 12
# args.force_niter = 100
# args.niter = 10
# state, bs, Bs, Bes, dls = mcmc_eq(args, g, state)
# state

In [18]:
# g = gt.collection.ns["new_guinea_tribes"]
# state = gt.LayeredBlockState(
#     g, 
#     ec=g.ep.weight,
#     B=10,
#     layers=True, 
#     deg_corr=True,
#     overlap=False,
# )
# args.wait = 12
# args.force_niter = 100
# args.niter = 10
# state, bs, Bs, Bes, dls = mcmc_eq(args, g, state)
# state