In [None]:
import networkx as nx
from functools import partial
import pandas as pd
import numpy as np
import pickle
from networkit import linkprediction as lp, nxadapter
#lp for link-prediction, nxadapter for converting nx graph to networkit graph

import csv
from networkx.readwrite import json_graph
import json

**Define Local functions**

In [0]:
def assign_label(pair, graph):
  u, v = pair[0], pair[1]
  return (int(graph.hasEdge(u,v)))

In [0]:
def concatenate(node_set, label):
  dataset = pd.DataFrame({'nodes': node_set, 'label': label})
  return dataset

In [0]:
with open('./input/graph.txt', 'r') as f:
  reader = csv.reader(f)
  data = list(reader)
  f.close()

In [0]:
graph = [element[0].split(' ') for element in data]
np.shape(graph)

(230650, 2)

In [0]:
valid = nx.DiGraph()
valid.add_edges_from(graph)

In [0]:
valid.adj.items()

ItemsView(AdjacencyView({'9489': {'101': {}, '604': {}, '22': {}, '11191': {}, '766': {}, '628': {}, '2133': {}, '7923': {}, '456': {}}, '101': {'43386': {}, '1130': {}, '3675': {}, '283': {}, '5664': {}, '14688': {}, '22875': {}, '20423': {}, '47252': {}, '364': {}, '22': {}, '456': {}, '17088': {}, '766': {}, '3705': {}, '1258': {}, '8490': {}, '19141': {}, '21785': {}, '5': {}, '22817': {}, '10': {}, '2041': {}, '576': {}, '18': {}, '1512': {}, '10971': {}, '26300': {}, '14006': {}, '44403': {}, '146': {}, '2309': {}, '127': {}, '1': {}, '17780': {}, '12190': {}, '32748': {}, '25506': {}, '19896': {}, '15395': {}, '49337': {}, '8287': {}, '119': {}, '852': {}, '104': {}, '604': {}, '50948': {}, '704': {}, '18673': {}, '227': {}, '2133': {}, '33': {}, '350': {}, '9272': {}, '1318': {}, '20293': {}, '57': {}, '21582': {}, '7': {}, '20545': {}, '67': {}, '39474': {}, '495': {}, '5083': {}, '156': {}, '25': {}, '2062': {}, '26': {}, '4139': {}, '21959': {}, '19529': {}, '6123': {}, '522

**OPTIONAL**

In [0]:
#Adding attributes to nodes in the graph
with open('./input/embeddings.pickle', 'rb') as handle:
  embeddings = pickle.load(handle)

Adding attributes(embeddings) of nodes 

In [0]:
valid.add_nodes_from([(node, embedding) for node, embedding in zip(list(embeddings.keys()), list(embeddings.values()))])

In [0]:
valid.adj.items()

Output hidden; open in https://colab.research.google.com to view.

**CONTINUED**

In [0]:
#Covert networkx graph to networkit graph
valid_it = nxadapter.nx2nk(valid)

#Training and Test graph generation
test_it = lp.RandomLinkSampler.byPercentage(valid_it, 0.9)
train_it = lp.RandomLinkSampler.byPercentage(test_it, 0.7)

In [0]:
# Training and testing sets creation
testing_set = lp.MissingLinksFinder(test_it).findAtDistance(2)
training_set = lp.MissingLinksFinder(train_it).findAtDistance(2)

In [0]:
# Label creation
y_train = list(map(partial(assign_label, graph = test_it), training_set))
y_test = list(map(partial(assign_label, graph = valid_it), testing_set))

In [0]:
# Concatenation of labels with samples
train = concatenate(training_set, y_train)
test = concatenate(testing_set, y_test)

train.head()

Unnamed: 0,nodes,label
0,"(0, 2)",0
1,"(0, 10)",0
2,"(0, 26)",0
3,"(0, 43)",0
4,"(0, 57)",0


In [0]:
len(test)

1772801

In [0]:
len(train)

1348949

In [0]:
train[['src', 'tar']] = pd.DataFrame(train['nodes'].tolist(), index = train.index)
train.drop(labels = ['nodes'], axis = 1, inplace = True)

test[['src', 'tar']] = pd.DataFrame(test['nodes'].tolist(), index = test.index)
test.drop(labels = ['nodes'], axis = 1, inplace = True)

In [0]:
train.head()

Unnamed: 0,label,src,tar
0,0,0,2
1,0,0,10
2,0,0,26
3,0,0,43
4,0,0,57


In [0]:
test.head()

Unnamed: 0,label,src,tar
0,0,0,2
1,0,0,7
2,0,0,10
3,0,0,26
4,0,0,43


In [0]:
#Rearraning columns in test and train

cols = ['src', 'tar', 'label']
train = train[cols]
test = test[cols]

In [0]:
train.head()

Unnamed: 0,src,tar,label
0,0,2,0
1,0,10,0
2,0,26,0
3,0,43,0
4,0,57,0


In [0]:
test.head()

Unnamed: 0,src,tar,label
0,0,2,0
1,0,7,0
2,0,10,0
3,0,26,0
4,0,43,0


In [0]:
train.to_csv('./input/train.csv', header = True, index = False)
test.to_csv('./input/test.csv', header = True, index = False)

In [0]:
valid_nx = nxadapter.nk2nx(valid_it)
test_nx = nxadapter.nk2nx(test_it)
train_nx = nxadapter.nk2nx(train_it)

In [0]:
nx.write_gpickle(valid_nx, "./input/valid_graph.gpickle")
nx.write_gpickle(test_nx, "./input/test_graph.gpickle")
nx.write_gpickle(train_nx, "./input/train_graph.gpickle")