In [None]:
import os
import random
from time import time
import pandas as pd
import numpy as np
import networkx as nx
import graph_tool.all as gt
from motif_counts import *
from tqdm.auto import tqdm
from collections import defaultdict
from tqdm import tqdm
import pickle

# motif_counts.py: Function Reference

- **er_sample_degs(g, p, subgraph=True)**  
  Sample in- and out-degree sequences from an Erdős–Rényi model (as subgraph or full graph)

- **er_gr_sample_degs(ug, p_uni, p_bi, subgraph=True)**  
  Sample degrees from a generalized ER model preserving uni- and bi-directional edge rates

- **GE_ER_sample(all_undir_edges, two_pr_GE)**  
  Generate a single randomized edge set under specified probabilities for uni/bi edges

- **switch_and_hold(edges, niters=1000, verbose=False)**  
  Basic switch-and-hold configuration model preserving degree sequence

- **switch_and_hold_GE(edges, niters=1000, verbose=False)**  
  Configuration model that also preserves number of bidirectional edges

- **switch_and_hold_R3(edges, niters=1000, verbose=False)**  
  Configuration model preserving both uni- and bi-degree constraints

- **get_unidirectional_only(E)**  
  Return set of directed edges that are not reciprocated

- **get_bidirectional(E)**  
  Return set of directed edges whose reverse also exists

- **get_autapses(E)**  
  Return set of self-loops in the edge list

- **split_edge_set_to_two_patterns(E)**  
  Split E into (unidirectional, bidirectional) edge sets

- **merge_two_patterns_to_edge_set(uni_edges, bi_edges)**  
  Merge uni and bi sets back into a complete directed edge set

- **graph_to_edges_sets(g)**  
  Convert a NetworkX or graph_tool graph into uni/bi edge sets

- **edge_sets_to_graph(uni_edges, bi_edges)**  
  Build a NetworkX DiGraph from uni/bi edge sets

- **count_two_neuron_motifs(V, E)**  
  Count 2-node motif types (null, uni, bi, autapse) given vertices V and edge list E

- **count_two_neuron_motifs_graph(g)**  
  Wrapper: count two-neuron motifs directly from a graph object

- **compute_ER_two_neuron_motifs(g)**  
  Compute expected counts of 2-node motifs under ER null model

- **class Triplet**  
  Data structure for a 3-node subgraph with methods for edge handling and comparison

- **reflect(edges, identity)**  
  Helper for Triplet: reflect edge set across a chosen vertex axis

- **match_edges(A, B, reflections=range(3))**  
  Test if two edge sets are equivalent under any reflection symmetry

- **match(A, B)**  
  Test if two Triplet instances represent the same motif

- **collect_triplets(V, E)**  
  Enumerate all distinct 3-node Triplet objects present in V and E

- **collect_three_neuron_motifs(V, E, motifs)**  
  Group every Triplet into one of the canonical motif types

- **count_three_neuron_motifs(V, E, motifs)**  
  Return counts and instances of all 16 three-node motifs

- **compute_three_neuron_motif_probabilities(g)**  
  Compute expected 3-node motif probabilities under ER null model

- **compute_three_neuron_motif_probabilities_GE(g)**  
  Compute expected 3-node motif probabilities under generalized ER model

- **compute_expected_three_neuron_motifs(g, prob_dict)**  
  Scale motif probabilities to absolute expected counts

- **sample_config_two_neuron_motifs(G, samples, niters)**  
  Monte Carlo sampling of 2-node motifs via switch-and-hold

- **sample_config_three_neuron_motifs(G, samples, niters)**  
  Monte Carlo sampling of 3-node motifs via switch-and-hold

- **proximity_sample_two_neuron_motifs(…)**, **continuous_sample_two_neuron_motifs(…)**, etc.  
  Variants for sampling motifs under different null and sampling schemes

- **continuous_sample_three_neuron_motifs(…)**, **continuous_sample_three_neuron_motifs_GE(…)**, **continuous_sample_three_neuron_motifs_R3(…)**  
  Monte Carlo sampling of 3-node motifs under various constraints

- **sample_three_neuron_motifs_GE_ER(V, two_pr_GE, samples, thread, i)**  
  Sample 3-node motifs under generalized ER (GE-ER) model

- **sample_motifs_parallel_continuous(…)**, **sample_motifs_parallel_continuous_GE_ER(…)**, **sample_motifs_parallel_proximity(…)**  
  Parallel wrappers to run motif sampling across multiple processes

- **clustering_coef(counts)**  
  Compute clustering coefficient from a vector of 3-node motif counts


In [3]:
df = pd.read_feather('/Users/fkampf/Documents/mcns.network.analysis/mcns_fw_edge_comp.feather')

In [4]:
# 1) build a unique list of all node labels
labels = np.unique(np.concatenate([df['pre'].values, df['post'].values]))

# 2) create the graph and a string vertex‐property to store the label
g = gt.Graph(directed=True)
v_label = g.new_vp("string")
g.vp["label"] = v_label

# 3) add one vertex per label, keep a Python dict to map label→vertex
label2v = {}
for L in labels:
    v = g.add_vertex()
    label2v[L] = v
    v_label[v] = str(L)

# 4) create a float edge‐property for your weights
e_weight = g.new_ep("float")
g.ep["weight"] = e_weight

# 5) add all edges with their weights
edge_list = [
    (label2v[src], label2v[tgt], float(w))
    for src, tgt, w in df[['pre','post','weight_m']].itertuples(index=False)
]
g.add_edge_list(edge_list, eprops=[g.ep["weight"]])
loops = [e for e in g.edges() if e.source() == e.target()]
for e in loops:
    g.remove_edge(e)

In [5]:
V = list(range(g.num_vertices()))
E = {
    (int(e.source()), int(e.target()))
    for e in g.edges()
    if g.ep['weight'][e] >= 5
}

In [None]:
import graph_tool.all as gt
from graph_tool.clustering import motifs, motif_significance
from graph_tool.all import GraphView
from graph_tool.all import Graph
from graph_tool.clustering import motifs    
from graph_tool.draw import graph_draw



keep = g.new_vertex_property("bool")
keep.a[:] = False
keep.a[:500] = True

# view that hides all other vertices
view = GraphView(g, vfilt=keep)

# copy into a standalone graph, dropping hidden bits
g100 = Graph(view, prune=True)



motifs_list, counts, maps = motifs(g, 3, return_maps=True)



for i, (m, c) in enumerate(zip(motifs_list, counts)):
    # extract the edge list of this motif
    edges = [(int(e.source()), int(e.target())) for e in m.edges()]
    print(f"Motif {i}: edges = {edges}, count = {c}")

    # if you’d like to visualize it:
    graph_draw(
        m,
        vertex_fill_color=[0.5,0.5,0.5,1], 
        output_size=(200,200),
        output=f"{i}.png"  # omit or set a filename like "motif_{i}.png"
    )

with open('motifs_list.pkl', 'wb') as f:
    # protocol=pickle.HIGHEST_PROTOCOL uses the most efficient format
    pickle.dump(motifs_list, f, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('counts.pkl', 'wb') as f:
    # protocol=pickle.HIGHEST_PROTOCOL uses the most efficient format
    pickle.dump(counts, f, protocol=pickle.HIGHEST_PROTOCOL)

with open('maps.pkl', 'wb') as f:
    # protocol=pickle.HIGHEST_PROTOCOL uses the most efficient format
    pickle.dump(maps, f, protocol=pickle.HIGHEST_PROTOCOL)

