# GraphRNN

In [None]:
!git clone --single-branch --branch colab https://github.com/joaopedromattos/GraphRNN

Cloning into 'GraphRNN'...
remote: Enumerating objects: 99, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 99 (delta 12), reused 19 (delta 8), pack-reused 74[K
Unpacking objects: 100% (99/99), done.


In [None]:
!pip install gdown
!gdown --id 1RF_bIo5ndxPhu9SJw-T8HBcuHyaGQGL0 && tar -xzvf datasets.tar.gz

Downloading...
From: https://drive.google.com/uc?id=1RF_bIo5ndxPhu9SJw-T8HBcuHyaGQGL0
To: /content/datasets.tar.gz
22.7MB [00:00, 62.4MB/s]
datasets_runs/
datasets_runs/run_1_google_news_5w1h_graph_hin.nx
datasets_runs/run_6_40er_5w1h_graph_hin.nx
datasets_runs/run_4_bbc_5w1h_graph_hin.nx
datasets_runs/run_8_gold_standard_5w1h_graph_hin.nx
datasets_runs/run_5_bbc_5w1h_graph_hin.nx
datasets_runs/run_9_google_news_5w1h_graph_hin.nx
datasets_runs/run_5_gold_standard_5w1h_graph_hin.nx
datasets_runs/run_2_bbc_5w1h_graph_hin.nx
datasets_runs/run_9_news_cluster_5w1h_graph_hin.nx
datasets_runs/run_7_40er_5w1h_graph_hin.nx
datasets_runs/run_9_gold_standard_5w1h_graph_hin.nx
datasets_runs/run_8_google_news_5w1h_graph_hin.nx
datasets_runs/run_10_bbc_5w1h_graph_hin.nx
datasets_runs/run_8_news_cluster_5w1h_graph_hin.nx
datasets_runs/run_2_news_cluster_5w1h_graph_hin.nx
datasets_runs/run_8_40er_5w1h_graph_hin.nx
datasets_runs/run_6_bbc_5w1h_graph_hin.nx
datasets_runs/run_4_google_news_5w1h_graph_hin

In [None]:
!mv GraphRNN/* .

In [None]:
!mkdir ./dataset/EVENT

## Preparing our graph

In [None]:
import networkx as nx
import numpy as np

G = nx.read_gpickle('./datasets_runs/run_1_gold_standard_5w1h_graph_hin.nx') # selecting the graph
len(G.nodes), len(G.edges)

(579, 803)

### Indicator file

In [None]:
# GraphRNN receives a file with a number in every ith line, 
# that represents the graph to which the ith node belongs to.
# E.g.: line 85824 with a value 222 means that the node 85824 belongs to 
# the graph number 222.
node_mapper = {i : v for i, v in enumerate(G.nodes)}
node_mapper.keys()
np.savetxt("EVENT_graph_indicator.txt", np.ones(shape=len(G.nodes)), fmt='%d')

### Adj Matrix

In [None]:
# GraphRNN receives an edgelist to mount an adjacency matrix
# inside data.py file on Graph_load_batch method.
G_relabel = nx.relabel_nodes(G, {v : k for k, v in node_mapper.items()})
nx.write_edgelist(G_relabel, "EVENT_A.txt", data=False, delimiter=', ')

### Node Labels

In [None]:
# Here we get a list of all labels of all nodes.
# In case of non-labeled nodes, we manually label them with "no_label"
labels = [G.nodes[v]['label'] if 'label' in G.nodes[v] else 'no_label' for i, v in enumerate(G.nodes)]


# We'll give a unique natural number to each label of our graph.
label_mapper = dict()
count = 0
for i, v in enumerate(labels):
    if (not (v in label_mapper) ):
        label_mapper[v] = count
        count += 1
print(label_mapper)

# Mapping our labels to natural numbers and writing them to a file.
node_labels_list = list(map(lambda x: label_mapper[x], labels))
print(node_labels_list)
np.savetxt("EVENT_node_labels.txt", node_labels_list, fmt='%d')

{'toberone-gate': 0, 'no_label': 1, 'clinton blames comey': 2, 'china well': 3, 'cubs win championship': 4, 'seattle shooting': 5, 'gaga protests': 6, 'f1': 7, 'clinton clear': 8, 'consulate attack': 9, 'trump and obama': 10, 'tram': 11, 'tram victims': 12, nan: 13}
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 2, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 1, 1, 1, 1, 

In [None]:
!mv EVENT_* dataset/EVENT/

## Running GraphRNN

In [None]:
!pip install -r requirements.txt

Collecting tensorboard-logger
  Downloading https://files.pythonhosted.org/packages/87/7a/ec0fd26dba69191f82eb8f38f5b401c124f45a207490a7ade6ea9717ecdb/tensorboard_logger-0.1.0-py2.py3-none-any.whl
Collecting networkx==1.11
[?25l  Downloading https://files.pythonhosted.org/packages/d3/2c/e473e54afc9fae58dfa97066ef6709a7e35a1dd1c28c5a3842989322be00/networkx-1.11-py2.py3-none-any.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 41.7MB/s 
[31mERROR: scikit-image 0.16.2 has requirement networkx>=2.0, but you'll have networkx 1.11 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
Installing collected packages: tensorboard-logger, networkx
  Found existing installation: networkx 2.5.1
    Uninstalling networkx-2.5.1:
      Successfully uninstalled networkx-2.5.1
Successfully installed networkx-1.11 tensorboard-logger-0.1.0


In [None]:
!python main.py

2021-05-08 15:06:51.369565: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
1.8.1+cu101
Using CUDA: True
CUDA available: True
_CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15109MB, multi_processor_count=40)
CUDA 1
File name prefix GraphRNN_RNN_EVENT_4_128_
Loading graph dataset: EVENT
Loaded 1
[<networkx.classes.graph.Graph object at 0x7fade5894950>] [] [] [<networkx.classes.graph.Graph object at 0x7fade5894950>]
graph_validate_len 0.0
graph_test_len 578.0
total graph num: 1, training set: 1
max number node: 578
max/min number edge: 794; 794
max previous node: 230
train and test graphs saved at:  ./graphs/GraphRNN_RNN_EVENT_4_128_test_0.dat
  cpuset_checked))
  nn.init.xavier_uniform(param,gain=nn.init.calculate_gain('sigmoid'))
  nn.init.constant(param, 0.25)
  m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
Epoch: 1/10, train loss: 0.115169, graph type

## Converting our outputs

In [None]:
import pickle

G_pred_list = pickle.load( open( "graphs/GraphRNN_RNN_EVENT_4_128_pred_10_1_4000_nodes.dat", "rb" ) )
G_pred_list

[<networkx.classes.graph.Graph at 0x7f604987a090>]

In [None]:
graph_list = []
for i in G_pred_list:

    cur_graph_edges = [(j, k, i.edge[j][k]['weight']) for j in i.edge.keys() for k in i.edge[j]]
    test_graph = nx.DiGraph()
    test_graph.add_nodes_from(i.node)
    test_graph.add_weighted_edges_from(cur_graph_edges)
    graph_list.append(test_graph)

    print("Nodes, edges:", len(i.node.keys()), len(cur_graph_edges))

pickle.dump(graph_list, open('graph_list.dat', 'wb'))

Nodes, edges: 4001 150140
