In [1]:
from numba import cuda
import time
import numpy as np
import networkx as nx
from tqdm import tqdm
import cupy
import random
import os
import grape
from gensim.models import Word2Vec
import math
import pickle
import torch
from scipy.sparse import csr_matrix

# os.environ['NUMBA_DEBUG'] = '1'
# os.environ['NUMBA_ENABLE_CUDASIM'] = '1'

import sys
sys.path.append('..')

from graphcuda import generate_random_adjacency_matrix, floyd_warshall_gpu, faq_align

cuda.gpus

  from .autonotebook import tqdm as notebook_tqdm


<numba.cuda.cudadrv.devices._DeviceList at 0x1c7044fbe20>

In [2]:
def damage_graph(A: np.array, max_deleted_edges=10):
    assert A.shape[0] == A.shape[1]

    result = cupy.copy(A)

    # Upper triangle # entries: N(N−1)/2
    edges = cupy.nonzero(cupy.triu(A, 1))
    edges = list(map(cupy.asnumpy, edges))
    random_indices = np.random.choice(edges[0].shape[0], random.randint(0, max_deleted_edges), replace=False)

    for i in random_indices:
        x = edges[0][i]
        y = edges[1][i]
        
        temp = cupy.copy(result)
        temp[y, x] = 0
        temp[x, y] = 0
        G = nx.from_numpy_array(temp)
        if nx.is_connected(G):
            result[y, x] = 1000000000
            result[x, y] = 1000000000
    
    return result

def get_embedding(G: np.array, walk_len=100, num_walks=10, dimension_size=128, p=1, q=2):
    folder_path = os.path.join(os.getcwd(), 'tmp')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    temp_file_path = os.path.join(folder_path, 'tmp.tsv')
    with open(temp_file_path, 'w') as f:
        # Write edgelist
        for i, row in enumerate(G[:-1]):
            for j, col in enumerate(row[i+1:]):
                if col > 0:
                    f.write(f'{i}\t{j + i + 1}\t{col}\n')
    
    # GRAPE Model
    grape_model = grape.Graph.from_csv(
        # Edges related parameters

        ## The path to the edges list tsv
        edge_path=temp_file_path,
        ## Set the tab as the separator between values
        edge_list_separator="\t",
        ## The first rows should NOT be used as the columns names
        edge_list_header=False,
        ## The source nodes are in the first nodes
        sources_column_number=0,
        ## The destination nodes are in the second column
        destinations_column_number=1,
        ## Both source and destinations columns use numeric node_ids instead of node names
        edge_list_numeric_node_ids=True,
        ## The weights are in the third column
        weights_column_number=2,

        # Graph related parameters
        ## The graph is undirected
        directed=False,
        ## The name of the graph is HomoSapiens
        name="Temp Grape Graph",
        ## Display a progress bar, (this might be in the terminal and not in the notebook)
        verbose=True,
    )
    walks = grape_model.complete_walks(
        walk_length=walk_len,
        iterations=num_walks,
        return_weight=p, # p
        explore_weight=q # q
    )

    grape_word2vec = Word2Vec(
        walks.tolist(),
        vector_size=dimension_size,
        window=5,
        min_count=0,
        sg=1,
        workers=16,
        epochs=10,
        seed=123
    )
    
    return grape_word2vec.wv.vectors

def distance_sum(G: np.ndarray):
    fw = floyd_warshall_gpu(G)
    return cupy.sum(fw)

def get_edgelist(A: np.ndarray) -> list[tuple]:
    return [(i, j) for (i, j), val in np.ndenumerate(np.triu(A, k=1)) if val > 0]

def get_weightlist(A: np.ndarray) -> list[tuple]:
    return [val for _, val in np.ndenumerate(np.triu(A, k=1)) if val > 0]

def get_edgelist_tensor(A: np.ndarray) -> torch.Tensor:
    return torch.tensor(get_edgelist(A)).t().contiguous()

def get_weightlist_tensor(A: np.ndarray) -> torch.Tensor:
    return torch.tensor(get_weightlist(A))

def sparse_mx_to_torch_sparse_tensor(sparse_mx) -> torch.FloatTensor:
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    
    sparse_mx = sparse_mx.tocoo().astype(float)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    # FOR UNWEIGHTED, data only contains ones!!!!
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
    
class SGNNSample:
    def __init__(self, n_nodes: int, prob_edge: float, max_deleted_edges: int = 10, walk_len: int = 100, num_walks: int = 10, dimension_size: int = 256, p: int = 1, q: int = 2, seed: int = 42, max_iter_align: int = 30):

        self.seed = seed
        self.max_deleted_edges = max_deleted_edges

        self.G_base = generate_random_adjacency_matrix(n_nodes, prob_edge)

        self.G_modified = damage_graph(self.G_base, max_deleted_edges=max_deleted_edges)
        self.G_modified = faq_align(self.G_base, self.G_modified, seed=seed, max_iter=max_iter_align)

        self.edgelist_base     = get_edgelist_tensor(self.G_base)
        self.edgelist_modified = get_edgelist_tensor(self.G_modified)

        self.weightlist_base     = get_weightlist_tensor(self.G_base)
        self.weightlist_modified = get_weightlist_tensor(self.G_modified)

        self.embedding_base = get_embedding(self.G_base, walk_len=walk_len, num_walks=num_walks, dimension_size=dimension_size, p=p, q=q)
        self.embedding_base = csr_matrix(self.embedding_base)
        self.embedding_base = sparse_mx_to_torch_sparse_tensor(self.embedding_base)
        
        self.embedding_modified = get_embedding(self.G_modified, walk_len=walk_len, num_walks=num_walks, dimension_size=dimension_size, p=p, q=q)
        self.embedding_modified = csr_matrix(self.embedding_modified)
        self.embedding_modified = sparse_mx_to_torch_sparse_tensor(self.embedding_modified)

        self.centrality_base = distance_sum(self.G_base)
        self.centrality_modified = distance_sum(self.G_modified)

        self.centrality_ratio = 1 - (self.centrality_modified - self.centrality_base) / self.centrality_base

In [12]:
n_nodes   = 500
prob_edge = 1.2/(100-1)
N = 1000

In [4]:
pairs = [SGNNSample(n_nodes, prob_edge) for _ in tqdm(range(N), position=0, desc='N', leave=False, ncols=100)]

N:   0%|                                                                   | 0/1000 [00:00<?, ?it/s]

  return torch.sparse.FloatTensor(indices, values, shape)
                                                                                                    

In [3]:
import pickle

training_file = 'graphs_500_normalized.pkl'
with open(training_file, 'rb') as f:
    pairs = pickle.load(f)

In [10]:
def normalize_centrality(pairs):
    centrality_min = min(pairs, key=lambda pair: pair.centrality_ratio).centrality_ratio

    for pair in pairs:
        pair.centrality_ratio_normalized = (pair.centrality_ratio - centrality_min) / (1 - centrality_min)
        print(pair.centrality_ratio_normalized)

normalize_centrality(pairs)

0.9930769095027077
0.9990849013927857
0.9918869656947992
1.0
0.995503728446777
0.9929600486773305
0.9920688550073532
0.9999563659964207
0.9999721983092563
0.9922183619065449
0.9911836708829203
0.9999999999999993
1.0
0.9833913230929359
1.0
0.9970524126560106
0.9958625296395556
0.927306138032624
0.9940223084671008
0.994447059403512
0.9999384829044686
0.9978629967072004
0.8200441513938806
0.9767608221016446
0.995118374309319
0.9942471261656272
0.6804437635269597
0.9994045679450434
1.0
0.9851530246358273
0.9931321373100411
0.9968511351550028
0.9964617701489459
1.0
0.9939110281684114
0.9978964538913979
0.977601783669695
0.9977824580586521
0.9909823594058212
0.994488138595845
0.5186228108470351
0.9833836382192701
1.0000000000000007
0.9936365781825406
0.9999284304476808
0.9957315643073426
0.9998778016936983
0.989651537454579
1.0
0.9878321404925952
0.9958016278805938
0.9882144045290862
0.9893313966287515
0.999710726423668
0.994741446264008
0.9980865336256608
0.9795839873677968
0.99786365014560

In [1]:
with open(f'graphs_{n_nodes}_normalized.pkl', 'wb') as f:
    pickle.dump(pairs, f, protocol=pickle.HIGHEST_PROTOCOL)

NameError: name 'n_nodes' is not defined