In [None]:
import networkx as nx
import pandas as pd 
import os
import pickle

# Create an undirected weighted graph
csv_list = os.listdir('mind_adni1_bl_tiny/graphs')
for csv in csv_list:
    adj_df = pd.read_csv('mind_adni1_bl_tiny/graphs/' + csv)
    adj_df.index = adj_df.columns
    G = nx.from_pandas_adjacency(adj_df)
    # Save the graph
    with open ('mind_adni1_bl_tiny/graphs/' + csv.replace('.csv', '.pickle'), 'wb') as f:
        pickle.dump(G, f)
    # To load:
    with open('mind_adni1_bl_tiny/graphs/' + csv.replace('.csv', '.pickle'), 'rb') as f:
        G_loaded = pickle.load(f)
    break



FileNotFoundError: [Errno 2] No such file or directory: 'graph.pkl'

In [2]:
G = nx.from_pandas_adjacency(adj_df)

# If you want a directed graph:
# G = nx.from_pandas_adjacency(adj_df, create_using=nx.DiGraph)

# Check edge weights
print(G.edges(data=True))


[('lh_bankssts', 'lh_caudalanteriorcingulate', {'weight': 0.1140652944054903}), ('lh_bankssts', 'lh_caudalmiddlefrontal', {'weight': 0.1943389061058107}), ('lh_bankssts', 'lh_cuneus', {'weight': 0.1077427731896204}), ('lh_bankssts', 'lh_entorhinal', {'weight': 0.0789638080821287}), ('lh_bankssts', 'lh_fusiform', {'weight': 0.2088747937941824}), ('lh_bankssts', 'lh_inferiorparietal', {'weight': 0.2285128803882098}), ('lh_bankssts', 'lh_inferiortemporal', {'weight': 0.1831480868837595}), ('lh_bankssts', 'lh_isthmuscingulate', {'weight': 0.1515314237191808}), ('lh_bankssts', 'lh_lateraloccipital', {'weight': 0.1541637591731776}), ('lh_bankssts', 'lh_lateralorbitofrontal', {'weight': 0.1891519528225142}), ('lh_bankssts', 'lh_lingual', {'weight': 0.1911275256362577}), ('lh_bankssts', 'lh_medialorbitofrontal', {'weight': 0.152820899879148}), ('lh_bankssts', 'lh_middletemporal', {'weight': 0.1617093801558571}), ('lh_bankssts', 'lh_parahippocampal', {'weight': 0.1467279629899732}), ('lh_bankss

In [4]:
import networkx as nx
from collections import defaultdict
from tqdm import tqdm

def weisfeiler_lehman_step(graph, labels):
    new_labels = {}
    for node in graph.nodes():
        neighbors = sorted([labels[neighbor] for neighbor in graph.neighbors(node)])
        label_string = str(labels[node]) + "_" + "_".join(map(str, neighbors))
        new_labels[node] = hash(label_string)
    return new_labels

def extract_wl_features(graph, h=2):
    labels = {n: str(n) for n in graph.nodes()}
    doc = []

    for _ in range(h):
        labels = weisfeiler_lehman_step(graph, labels)
        doc.extend([str(v) for v in labels.values()])

    return doc  # WL "words" for this graph

# graph_list: list of networkx.Graph objects
graph_list = [G]  # your multiple graphs
corpus = [extract_wl_features(g, h=2) for g in tqdm(graph_list)]


100%|██████████| 1/1 [00:00<00:00, 332.80it/s]


In [5]:
from gensim.models import Word2Vec
import numpy as np

w2v = Word2Vec(sentences=corpus, vector_size=128, window=5, min_count=1, workers=4, sg=1, epochs=10)

# Get graph-level embeddings by averaging node labels' vectors
graph_embeddings = []
for doc in corpus:
    vectors = [w2v.wv[word] for word in doc if word in w2v.wv]
    graph_vec = np.mean(vectors, axis=0)
    graph_embeddings.append(graph_vec)

graph_embeddings = np.stack(graph_embeddings)


In [6]:
graph_embeddings[0]

array([ 1.2282131e-04, -9.1086840e-04,  5.4534670e-04,  5.1287189e-04,
        2.4829403e-04,  1.4577653e-04, -7.4820768e-04, -2.0984365e-04,
       -3.3801782e-04,  5.3939410e-04,  4.5695598e-04,  2.3204579e-04,
       -8.8847399e-04, -1.3335163e-04,  7.8835094e-04,  1.0657270e-03,
       -6.7646755e-04,  2.3761543e-04, -3.2187099e-04,  4.3915591e-04,
        3.2203467e-04,  5.4760405e-04, -8.2085218e-04, -8.0876733e-04,
       -9.3984714e-04,  3.1191590e-05, -6.5675682e-05,  7.2020572e-04,
        2.7980321e-04, -5.3326460e-04,  8.1682811e-07, -3.3918615e-05,
        9.9709789e-05,  3.7455643e-04, -2.1507163e-04,  4.7577184e-04,
        5.1081390e-04, -8.2224215e-06, -1.6088820e-04,  2.6137184e-04,
       -2.6642685e-04,  5.9640937e-04,  2.1392189e-04, -2.5320682e-04,
        5.4042978e-04,  4.1850106e-04, -5.1570451e-04,  3.2499322e-04,
       -5.1866827e-04,  6.5789866e-04,  9.8889199e-05,  3.7501601e-04,
        2.1622807e-04,  1.4999416e-04, -8.1646729e-05,  3.1268351e-05,
      