In [1]:
import networkx as nx
import random
import numpy as np
import itertools
from gensim.models import Word2Vec
from pecanpy import pecanpy
from scipy import sparse
import torch
import pickle
import time

RANDOM_SEED = 123456
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

In [22]:
def gnp_random_connected_graph(n: int, p: float) -> nx.Graph:
    """
    Generates a random undirected graph, similarly to an Erdős-Rényi 
    graph, but enforcing that the resulting graph is conneted
    """
    edges = itertools.combinations(range(n), 2)
    G = nx.Graph()
    G.add_nodes_from(range(n))
    if p <= 0:
        return G
    if p >= 1:
        return nx.complete_graph(n, create_using=G)
    
    for _, node_edges in itertools.groupby(edges, key=lambda x: x[0]):
        node_edges = list(node_edges)
        random_edge = random.choice(node_edges)
        G.add_edge(*random_edge)
        for e in node_edges:
            if random.random() < p:
                G.add_edge(*e)

    for (u, v) in G.edges():
        G.edges[u,v]['weight'] = random.random()*100

    print('Generated')
    return G

def distance_sum(g):
    lengths = dict(nx.floyd_warshall(g, weight='weight'))
    
    total_sum=0
    
    for i in range(len(g.nodes)):
        node_lengths=lengths[i]
        node_sum=sum(np.array(list(node_lengths.values())))
        # print(node_sum)
        total_sum+=node_sum
    return total_sum

def get_node_embedding(graph,dimensions=128, pfilename="test.edg"):
    print("Edges: " + str(len(graph.edges())))
    
    # Pecanpy
    nx.write_weighted_edgelist(graph, pfilename,delimiter='\t')
    
    g = pecanpy.SparseOTF(p=1, q=2, workers=1, verbose=True, extend=True, gamma=0, random_state=RANDOM_SEED)
    print("Extend=True")
    g.read_edg(pfilename, weighted=True, directed=True)
    
    starttime2 = time.time()
    walks = g.simulate_walks(num_walks=10, walk_length=100)# use random walks to train embeddings
    print(walks)
    model = Word2Vec(walks, vector_size=dimensions, window=5, min_count=5, sg=1, workers=1, epochs=10, seed=RANDOM_SEED)
    endtime2 = time.time()
    duration = endtime2-starttime2
    
    print(f'Pecanpy {duration}')
    
    feature_matrix = (model.wv.vectors)
    
    feature_matrix_sparse = sparse.csr_matrix(feature_matrix)

    print(f'Feature matrix shape: {feature_matrix.shape}')
    
    return walks#feature_matrix_sparse


def sparse_mx_to_torch_sparse_tensor(sparse_mx) -> torch.FloatTensor:
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    
    sparse_mx = sparse_mx.tocoo().astype(float)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    # FOR UNWEIGHTED, data only contains ones!!!!
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def adjacency_feature_centrality(graph: nx.Graph, num_nodes: int, ndim: int = 256, ) -> nx.Graph:
    # start = time.time()
    centrality = distance_sum(graph)
    # end = time.time()

    feature_mat = get_node_embedding(graph, dimensions=ndim)

    feature_mat = sparse_mx_to_torch_sparse_tensor(feature_mat)

    edge_index: torch.Tensor = torch.tensor(list(graph.edges)).t().contiguous()
    edge_weight: torch.Tensor = torch.tensor([graph[u][v]['weight'] for u, v, in graph.edges])

    return edge_index, edge_weight, centrality, feature_mat

In [23]:
N_NODES = 100
PROB_EDGE = 1.2/(100-1)

graph = gnp_random_connected_graph(n=N_NODES, p=PROB_EDGE)

Generated


In [10]:
feature_mat0 = get_node_embedding(graph, dimensions=256)

Edges: 156
Extend=True


  data[indptr[i] : indptr[i + 1]].mean()
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


  0%|          | 0/1000 [00:00<?, ?it/s]

[['70', '99'], ['85', '87', '98', '99'], ['41', '75', '88', '89', '97', '99'], ['35', '79', '90', '96', '98', '99'], ['91', '99'], ['28', '57', '79', '90', '96', '98', '99'], ['68', '82', '90', '96', '98', '99'], ['49', '58', '75', '88', '89', '97', '99'], ['35', '80', '92', '95', '96', '98', '99'], ['35', '79', '90', '96', '98', '99'], ['22', '25', '35', '80', '92', '95', '96', '98', '99'], ['9', '62', '98', '99'], ['43', '94', '97', '99'], ['65', '83', '91', '99'], ['59', '67', '69', '84', '94', '97', '99'], ['95', '96', '98', '99'], ['55', '75', '88', '89', '97', '99'], ['51', '95', '96', '98', '99'], ['52', '98', '99'], ['65', '83', '91', '99'], ['23', '55', '75', '88', '89', '97', '99'], ['55', '75', '88', '89', '97', '99'], ['2', '37', '91', '99'], ['7', '8', '22', '25', '35', '79', '90', '96', '98', '99'], ['10', '48', '58', '75', '88', '89', '97', '99'], ['78', '99'], ['1', '33', '44', '57', '76', '82', '90', '96', '98', '99'], ['84', '94', '97', '99'], ['56', '85', '87', '98',

In [31]:
graph = gnp_random_connected_graph(n=N_NODES, p=PROB_EDGE)

pfilename = 'test.edg'

nx.write_weighted_edgelist(graph, pfilename, delimiter='\t')

    
g = pecanpy.SparseOTF(p=1, q=2, workers=1, gamma=0, random_state=0)

g.read_edg(pfilename, weighted=True, directed=True)

walks = g.simulate_walks(num_walks=10, walk_length=100)# use random walks to train embeddings
print(walks)

Generated
[['82', '96', '99'], ['26', '76', '91', '96', '99'], ['86', '97', '99'], ['99'], ['64', '72', '99'], ['48', '92', '94', '97', '99'], ['80', '94', '97', '99'], ['53', '72', '99'], ['2', '75', '88', '91', '96', '99'], ['2', '38', '48', '92', '94', '97', '99'], ['73', '85', '98', '99'], ['49', '78', '83', '86', '97', '99'], ['92', '94', '97', '99'], ['62', '94', '97', '99'], ['41', '70', '93', '94', '97', '99'], ['34', '74', '79', '84', '86', '97', '99'], ['2', '38', '48', '92', '94', '97', '99'], ['72', '99'], ['51', '60', '97', '99'], ['40', '42', '79', '84', '86', '97', '99'], ['53', '74', '79', '84', '86', '97', '99'], ['68', '76', '91', '96', '99'], ['44', '72', '99'], ['4', '51', '62', '94', '97', '99'], ['47', '66', '75', '88', '91', '96', '99'], ['96', '99'], ['77', '84', '86', '97', '99'], ['1', '69', '82', '96', '99'], ['84', '86', '97', '99'], ['54', '58', '79', '84', '86', '97', '99'], ['76', '91', '96', '99'], ['44', '72', '99'], ['51', '60', '97', '99'], ['57', '78

In [32]:
g = pecanpy.SparseOTF(p=1, q=2, workers=1, gamma=0, random_state=0)

g.read_edg(pfilename, weighted=True, directed=True)
walks2 = g.simulate_walks(num_walks=10, walk_length=100)# use random walks to train embeddings
print(walks == walks2)

False


In [34]:
for w1, w2 in zip(walks, walks2):
    print(w1 == w2, w1, w2)

True ['82', '96', '99'] ['82', '96', '99']
True ['26', '76', '91', '96', '99'] ['26', '76', '91', '96', '99']
True ['86', '97', '99'] ['86', '97', '99']
True ['99'] ['99']
True ['64', '72', '99'] ['64', '72', '99']
True ['48', '92', '94', '97', '99'] ['48', '92', '94', '97', '99']
True ['80', '94', '97', '99'] ['80', '94', '97', '99']
True ['53', '72', '99'] ['53', '72', '99']
True ['2', '75', '88', '91', '96', '99'] ['2', '75', '88', '91', '96', '99']
True ['2', '38', '48', '92', '94', '97', '99'] ['2', '38', '48', '92', '94', '97', '99']
True ['73', '85', '98', '99'] ['73', '85', '98', '99']
True ['49', '78', '83', '86', '97', '99'] ['49', '78', '83', '86', '97', '99']
True ['92', '94', '97', '99'] ['92', '94', '97', '99']
True ['62', '94', '97', '99'] ['62', '94', '97', '99']
True ['41', '70', '93', '94', '97', '99'] ['41', '70', '93', '94', '97', '99']
True ['34', '74', '79', '84', '86', '97', '99'] ['34', '74', '79', '84', '86', '97', '99']
True ['2', '38', '48', '92', '94', '97',