# Stochastic Block Modeling

Calculate the cosine similarity matrix for afferent and efferent shared regions. Run SBM on this to infer the graph structure and determine clusters

In [5]:
import numpy as np
import pandas as pd
import bct
import matplotlib.pyplot as plt
from sklearn.manifold import MDS
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns # Using seaborn for prettier plotting
import plotly.graph_objects as go
from plotly.colors import qualitative
import graph_tool.all as gt

In [2]:
df_average = pd.read_csv('../data/average_connectome_data.csv', header=0, index_col=0)

# filter the afferent / efferent based on hippocampal connections, create similarity matrix
hippocampal_regions = np.array(['DG','CA3','CA2','CA1v','CA1d','SUBv','SUBd'])

# FROM hippocampus (efferent)
df_avg_from = df_average[df_average.index.isin(hippocampal_regions)]

# TO hippocampus (afferent)
df_average_t = df_average.T
df_avg_to = df_average_t[df_average_t.index.isin(hippocampal_regions)]

# drop HPC columns
df_avg_from = df_avg_from.drop(hippocampal_regions, axis=1)
df_avg_to = df_avg_to.drop(hippocampal_regions, axis=1)

# filter to only include columns and rows with at least one connection
df_avg_from = df_avg_from.loc[:,df_avg_from.apply(np.count_nonzero, axis=0) >= 1]
df_avg_to = df_avg_to.loc[:,df_avg_to.apply(np.count_nonzero, axis=0) >= 1]

# find the shared regions
common_cols = df_avg_to.columns.intersection(df_avg_from.columns)
df_avg_to_shared = df_avg_to[common_cols]
df_avg_from_shared = df_avg_from[common_cols]

In [4]:
# run cosine similarity
cosine_from_shared_values = cosine_similarity(df_avg_from_shared.T)
cosine_from_shared_labels = df_avg_from_shared.columns

cosine_df_from_shared = pd.DataFrame(cosine_from_shared_values, 
                             index=df_avg_from_shared.columns, 
                             columns=df_avg_from_shared.columns)

cosine_to_shared_values = cosine_similarity(df_avg_to_shared.T)
cosine_to_shared_labels = df_avg_to_shared.columns

cosine_df_to_shared = pd.DataFrame(cosine_to_shared_values, 
                             index=df_avg_to_shared.columns, 
                             columns=df_avg_to_shared.columns)

In [6]:
# start with efferent similarity matrix

N = 100

# np.fill_diagonal(cosine_dist_matrix, 0)

N = cosine_from_shared_values.shape[0]

# 1. Create a new undirected graph
g = gt.Graph(directed=False)

# 2. Add all N nodes to the graph
g.add_vertex(n=N)

# 3. Create an edge property map to store our weights (the distances)
# We use "double" for floating-point numbers
weights = g.new_edge_property("double")

# 4. Add edges and their weights from the matrix
# We iterate over the upper triangle of the matrix (k=1 to skip the diagonal)
rows, cols = np.triu_indices(N, k=1)

# Create a list of edges with their weights
# (This is much faster than adding them one by one)
edge_list = [(r, c, cosine_from_shared_values[r, c]) for r, c in zip(rows, cols)]

# Add all edges to the graph at once
g.add_edge_list(edge_list, eprops=[weights])

# 5. Store the weights as an internal property of the graph
# This makes it easier to access later
g.edge_properties["weight"] = weights

print(f"Graph created with {g.num_vertices()} vertices and {g.num_edges()} edges.")

Graph created with 72 vertices and 2556 edges.


In [11]:
state=gt.minimize_blockmodel_dl(g)

In [12]:
state.draw(pos=g.vp.pos, output="efferent_cosine_shared_regions.svg")

AttributeError: 'super' object has no attribute 'pos'

In [9]:
print("Fitting nested Stochastic Block Model...")

# Fit a hierarchical SBM
# This is the Bayesian part. It minimizes the description length (DL),
# which is equivalent to maximizing the posterior probability.
#
# We pass our edge weights via `recs` (real-valued edge covariates)
# and specify their type via `rec_types`.
# "real-normal" assumes weights are drawn from a Gaussian distribution,
# and the model will find the mu and sigma for each block-pair.
state = gt.minimize_nested_blockmodel_dl(g, 
                                          recs=[g.ep.weight], 
                                          rec_types=["real-normal"])

print("Model fitting complete.")

Fitting nested Stochastic Block Model...


TypeError: minimize_nested_blockmodel_dl() got an unexpected keyword argument 'recs'