In [1]:
import numpy as np
import random
import networkx as nx
import pickle
import time
from scipy import sparse
import torch
from tqdm.auto import trange
import grape
import os
from gensim.models import Word2Vec
import cupy
from graphcuda import floyd_warshall_gpu, generate_random_adjacency_matrix, faq_align
from xx_AllFunctions import gnp_random_connected_graph, get_adjacency_feature_centrality, get_adjacency_centrality, get_node_embedding, sparse_mx_to_torch_sparse_tensor

In [2]:
def distance_sum(G: np.ndarray):
    fw = floyd_warshall_gpu(G)
    return cupy.sum(fw)

def get_embedding(G: np.array, walk_len=100, num_walks=10, dimension_size=128, p=1, q=2):
    folder_path = os.path.join(os.getcwd(), 'tmp')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    temp_file_path = os.path.join(folder_path, 'tmp.tsv')
    with open(temp_file_path, 'w') as f:
        # Write edgelist
        for i, row in enumerate(G[:-1]):
            for j, col in enumerate(row[i+1:]):
                if col > 0:
                    f.write(f'{i}\t{j + i + 1}\t{col}\n')
    
    # GRAPE Model
    grape_model = grape.Graph.from_csv(
        # Edges related parameters

        ## The path to the edges list tsv
        edge_path=temp_file_path,
        ## Set the tab as the separator between values
        edge_list_separator="\t",
        ## The first rows should NOT be used as the columns names
        edge_list_header=False,
        ## The source nodes are in the first nodes
        sources_column_number=0,
        ## The destination nodes are in the second column
        destinations_column_number=1,
        ## Both source and destinations columns use numeric node_ids instead of node names
        edge_list_numeric_node_ids=True,
        ## The weights are in the third column
        weights_column_number=2,

        # Graph related parameters
        ## The graph is undirected
        directed=False,
        ## The name of the graph is HomoSapiens
        name="Temp Grape Graph",
        ## Display a progress bar, (this might be in the terminal and not in the notebook)
        verbose=True,
    )
    walks = grape_model.complete_walks(
        walk_length=walk_len,
        iterations=num_walks,
        return_weight=p, # p
        explore_weight=q # q
    )

    grape_word2vec = Word2Vec(
        walks.tolist(),
        vector_size=dimension_size,
        window=5,
        min_count=0,
        sg=1,
        workers=16,
        epochs=10,
        seed=123
    )

    feature_matrix = sparse.csr_matrix(grape_word2vec.wv.vectors)
    
    return feature_matrix

In [3]:
def damage_graph(A: np.array, max_deleted_edges=10):
    assert A.shape[0] == A.shape[1]

    result = cupy.copy(A)

    # Upper triangle # entries: N(N−1)/2
    edges = cupy.nonzero(cupy.triu(A, 1))
    edges = list(map(cupy.asnumpy, edges))
    random_indices = np.random.choice(edges[0].shape[0], random.randint(0, max_deleted_edges), replace=False)

    for i in random_indices:
        x = edges[0][i]
        y = edges[1][i]
        
        temp = cupy.copy(result)
        temp[y, x] = 0
        temp[x, y] = 0
        G = nx.from_numpy_array(temp)
        if nx.is_connected(G):
            result[y, x] = 1000000000
            result[x, y] = 1000000000
    
    return result

def get_fm_centrality(graph: nx.Graph):
    pfilename="test.edg"

    # ebc=nx.betweenness_centrality(g, weight = 'weight')
    total_sum_new=distance_sum(nx.to_numpy_array(graph))
    # eff=total_sum_orig/total_sum_new
    
    centrality = total_sum_new
        
    # adj_mat = np.zeros((num_nodes,num_nodes))
    # adj_mat_excerpt = get_adj_matrix(g).todense()
    # adj_mat[0:adj_mat_excerpt.shape[0], 0:adj_mat_excerpt.shape[1]] = adj_mat_excerpt
    # adj_mat = sparse.csr_matrix(adj_mat)
    

    
    ndim = 256
    feature_mat = np.zeros((graph.number_of_nodes(),ndim))

    # USING GRAPE
    feature_mat_excerpt = get_embedding(nx.to_numpy_array(graph), walk_len=50, num_walks=50, dimension_size=ndim, p=1, q=2).todense()
    # feature_mat_excerpt = get_node_embedding(graph, dimensions = ndim, walk_length = 50, num_walks = 50, workers = 12,pfilename=pfilename).todense()
    feature_mat[0:feature_mat_excerpt.shape[0]] = feature_mat_excerpt
    feature_mat = sparse.csr_matrix(feature_mat)

    # adj_mat = sparse_mx_to_torch_sparse_tensor(adj_mat)  

    feature_mat = sparse_mx_to_torch_sparse_tensor(feature_mat)

    edge_index = torch.tensor(list(graph.edges)).t().contiguous()
    edge_weight = torch.tensor([graph[u][v]['weight'] for u, v in graph.edges])

    return edge_index, edge_weight, centrality, feature_mat

def get_centrality(graph: nx.Graph):
    # ebc=nx.betweenness_centrality(g, weight = 'weight')
    total_sum_new=distance_sum(nx.to_numpy_array(graph))
    # eff=total_sum_orig/total_sum_new
    centrality = total_sum_new
        
    # adj_mat = np.zeros((num_nodes,num_nodes))
    # adj_mat_excerpt = get_adj_matrix(g).todense()
    # adj_mat[0:adj_mat_excerpt.shape[0], 0:adj_mat_excerpt.shape[1]] = adj_mat_excerpt
    # adj_mat = sparse.csr_matrix(adj_mat)
    
    # adj_mat = sparse_mx_to_torch_sparse_tensor(adj_mat)  
    # list_adj.append(adj_mat)
    
    edge_index = torch.tensor(list(graph.edges)).t().contiguous()
    edge_weight = torch.tensor([graph[u][v]['weight'] for u, v in graph.edges])

    return edge_index, edge_weight, centrality

In [6]:
class GNNSample:
    def __init__(self, reference_graph, n_nodes, prob_edge):
        self.base_graph = generate_random_adjacency_matrix(n_nodes, prob_edge)
        self.base_graph = faq_align(reference_graph, self.base_graph, seed=123, max_iter=30)

        self.mod_graph = damage_graph(self.base_graph)

        self.base_graph = nx.from_numpy_array(self.base_graph)
        self.mod_graph = nx.from_numpy_array(self.mod_graph).to_directed()

        self.base_edge_index, self.base_edge_weight, self.base_centrality, self.base_fm = get_fm_centrality(self.base_graph)
        self.mod_edge_index, self.mod_edge_weight, self.mod_centrality, self.mod_fm = get_fm_centrality(self.mod_graph)

In [7]:
n_train_g = 100

nnodes = 500

prob_edge = 1.2/(100-1)

list_train_graph_orig = []
list_train_graph = list()

list_test_graph_orig  = []
list_test_graph= list()

reference_graph = generate_random_adjacency_matrix(nnodes, prob_edge)

graphs = [GNNSample(reference_graph, nnodes, prob_edge) for _ in trange(n_train_g)]

  0%|          | 0/100 [00:00<?, ?it/s]

  return torch.sparse.FloatTensor(indices, values, shape)


In [8]:
for sample in graphs:
    sample.ratio = 1 - (sample.mod_centrality - sample.base_centrality) / sample.base_centrality

ratio_min = min(graphs, key=lambda sample: sample.ratio).ratio

for sample in graphs:
    sample.ratio_normalized = (sample.ratio - ratio_min) / (1 - ratio_min)

In [9]:
with open(r'./graph_data_train_multiple.pickle', 'wb') as handle:
    pickle.dump(graphs, handle, protocol=pickle.HIGHEST_PROTOCOL)