In [58]:
import torch
import networkx as nx
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

In [24]:
# load graph from networkx library
G = nx.karate_club_graph()

# create edge index from
sparse_G = nx.to_scipy_sparse_array(G)

adj = sparse_G.tocoo()
row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)

### Networkx graph class

Lets focus on the adjacency information of the graph. This information can be retrived from class using the "adj" method. For each element of this method, we find:

> 1) The existence or not of a link between two pair os nodes, represented by the existence of a element in the "adj" element dict
> 2) The weight of the link between the nodes

We can retrieve the information from the adjacencies as follows:

In [42]:
G.adj[0]

AtlasView({1: {'weight': 4}, 2: {'weight': 5}, 3: {'weight': 3}, 4: {'weight': 3}, 5: {'weight': 3}, 6: {'weight': 3}, 7: {'weight': 2}, 8: {'weight': 2}, 10: {'weight': 2}, 11: {'weight': 3}, 12: {'weight': 1}, 13: {'weight': 3}, 17: {'weight': 2}, 19: {'weight': 2}, 21: {'weight': 2}, 31: {'weight': 2}})

In [43]:
sparse_G.toarray().shape

(34, 34)

The graph has 34 nodes, and thus the adjacency object of the graph has 34 elements.

We can further summarize the information in all the edge elements using a sparse array. The array represent each row and column as a node in the graph and the elements of the array represent the existence os a link (0 / !=0) and the weight of the connection.

We use the "to_scipy_sparse_array" function to tranform elements of the graph into arrays. In particular, this function tranforms the "adj" objects into an array as follows:

In [47]:
sparse_G.toarray().shape

(34, 34)

In [48]:
sparse_G.toarray()

array([[0, 4, 5, ..., 2, 0, 0],
       [4, 0, 6, ..., 0, 0, 0],
       [5, 6, 0, ..., 0, 2, 0],
       ...,
       [2, 0, 0, ..., 0, 4, 4],
       [0, 0, 2, ..., 4, 0, 5],
       [0, 0, 0, ..., 4, 5, 0]])

In [88]:
print("Max. wegith value: ", sparse_G.toarray().max(), " -  Min. weight value: ", sparse_G.toarray().min())

Max. wegith value:  7  -  Min. weight value:  0


In [63]:
sparse_G.toarray()[1,:]

array([4, 0, 6, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 2, 0, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0])

We further tranform the the adjacency matrix into the COO format. The COO format "melt" the adjacency matrix into a 2 x 2 matrix in which the rows represent edges in the graph without the weights. We call this new stacked object "edge_index".

In [71]:
adj

<34x34 sparse array of type '<class 'numpy.int64'>'
	with 156 stored elements in COOrdinate format>

In [79]:
print("Max. adj col: ", adj.col.max(), " -  Max. adj row: ", adj.row.max())

Max. adj col:  33  -  Max. adj row:  33


In [83]:
edge_index = torch.stack([row, col], dim=0)

pd.DataFrame(edge_index)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,146,147,148,149,150,151,152,153,154,155
0,0,0,0,0,0,0,0,0,0,0,...,33,33,33,33,33,33,33,33,33,33
1,1,2,3,4,5,6,7,8,10,11,...,20,22,23,26,27,28,29,30,31,32


In [84]:
# retrieve the labels for each node
labels = np.asarray([G.nodes[i]['club'] != 'Mr. Hi' for i in G.nodes]).astype(np.int64)

In [85]:
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])