In [158]:
import networkx as nx
import numpy as np
import os
import pickle
import scipy.io as sio
from scipy import sparse
# print(nx.__version__)
# print(np.__version__)

## Construct graph paths for different datasets

In [159]:
graph_base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__'))))
# print(graph_path)
graph_name = 'synthetic'
graph_path = os.path.join(graph_base_path, 'data' , graph_name, 'link_structure.edgelist')
print('Graph path :: ', graph_path)

Graph path ::  /Users/anirban/Documents/IISC-Study/Project/Repos/line2vec/data/synthetic/link_structure.edgelist


## Load graph from edgelist file and convert it to line graph

In [160]:
G = nx.read_edgelist(graph_path, create_using=nx.DiGraph(), nodetype=int)
print(len(G.edges()))
L = nx.line_graph(G)
print(len(L.nodes()))
# print(L.edges())

30
30


## Build dictionary of modified edge weights for original graph

For each directed edge in $e=(v_i, v_j) \in E(G)$, evaluate the edge-weight $w_{ij}$ using the following equation : <br><br>&emsp;&emsp;&emsp;
$w_{ij} = max(\frac{D}{d_i*d_j} + \epsilon, \epsilon)$.
<br> 
<br>$D$ = Total in-degree/out-degree of the graph, 
<br>$d_i$ = Out-degree of the vertex $v_i$, 
<br>$d_j$ = In-degree of the vertex $v_j$

Here the assumption being the original graph $G=(V, E)$ is unweighted.

In [161]:
in_degree_dict = dict(G.in_degree())
out_degree_dict = dict(G.out_degree())
epsilon = 0.00001
# assert np.sum(in_degree_dict.values()) == np.sum(out_degree_dict.values())
assert np.sum(list(in_degree_dict.values())) == np.sum(list(out_degree_dict.values()))
# total_degree = np.sum(in_degree_dict.values())
total_degree = np.sum(list(in_degree_dict.values()))
# print(total_degree)
edge_weight_dict = {}
for edge in G.edges():
    start_vertex = edge[0]
    end_vertex = edge[1]
    start_vertex_degree = out_degree_dict[start_vertex]
    end_vertex_degree = in_degree_dict[end_vertex]
    edge_weight = max(np.log(float(total_degree) / (start_vertex_degree * end_vertex_degree)) + epsilon, epsilon)
    edge_weight_dict[edge] = edge_weight
# print(edge_weight_dict)

## Build dictionary of weighted degree of nodes 
This is measured based on the weights of edges associated with a node of the original graph calculated previously. For any node $v_i \in V(G)$, if the outgoing edges associated with it are denoted by $e_{ik}$ for those $k$ such that $e_{ik} \in E(G)$, then the weighted degree is as follows: <br>

&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;
$d^{'}_{i} = \sum_{k}{w_{ik}}$

In [162]:
weighted_out_degree_dict = {}
for node in G.nodes():
    weight = 0
    for neightbor in G.neighbors(node):
#         if((node, neightbor) in edge_weight_dict):
        weight += edge_weight_dict[(node, neightbor)]
#         else:
#             print('In reverse')
#             weight += edge_weight_dict[(neighbor, node)]
    weighted_out_degree_dict[node] = weight
# print(weighted_out_degree_dict)

## Build weighted line graph 

In the original graph, two directed edges $e_{ij}$ and $e_{jk}$ are adjacent if and only if $v_j$ is the destination vertex of $e_{ij}$ and source vertex of $e_{jk}$. We define the edge weight for $e^{L}_{ik}$ for the line graph $L(G)$ as follows: <br>

&emsp;&emsp;&emsp;&emsp;&emsp;
$ w^L_{ik} = \frac{w_{jk}} {\sum_{l=1}^{|V|}{w_{jl}}} = \frac{w_{jk}}{d^{'}_{j}} $

In [163]:
line_graph_edge_weight_dict = {}
for line_graph_edge in L.edges():
    original_graph_edge_src = line_graph_edge[0]
    original_graph_edge_dest = line_graph_edge[1]
#     print original_graph_edge_src, original_graph_edge_dest
    common_vertex = set(original_graph_edge_src).intersection(set(original_graph_edge_dest))
    start_vertex = set(original_graph_edge_src).difference(common_vertex)
    end_vertex = set(original_graph_edge_dest).difference(common_vertex)
    if(len(common_vertex) == 1 and len(start_vertex) != 0 and len(end_vertex) != 0):
        common_vertex = list(common_vertex)[0]
        start_vertex = list(start_vertex)[0]
        end_vertex = list(end_vertex)[0]
    else:
        # Handle the odd case of self-loops or parallel-edges
        common_vertex = original_graph_edge_src[1]
        start_vertex = original_graph_edge_src[0]
        end_vertex = original_graph_edge_dest[1]
        #assert original_graph_edge_src[1] == original_graph_edge_dest[0]
        #assert original_graph_edge_src[0] == original_graph_edge_dest[1]
#     degree_start_vertex_src_edge = degree_dict[start_vertex]
#     degree_end_vertex_src_edge = degree_dict[common_vertex]
#     if (degree_start_vertex_src_edge == 1):
#         weight_contri_src_edge = 1
#     else:
#         weight_contri_src_edge = float(degree_start_vertex_src_edge)/ (degree_start_vertex_src_edge + degree_end_vertex_src_edge)
    weight_dest_edge = edge_weight_dict[original_graph_edge_dest]
#     weight_src_edge = edge_weight_dict[original_graph_edge_src]
    weighted_degree_common_vertex = weighted_out_degree_dict[common_vertex]
    weight_contri_dest_edge = float(weight_dest_edge)/(weighted_degree_common_vertex)
#     line_graph_edge_weight = weight_contri_src_edge * weight_contri_dest_edge
    line_graph_edge_weight_dict[line_graph_edge] = weight_contri_dest_edge
# print(line_graph_edge_weight_dict)

## Define path for the line graph

In [7]:
line_graph_path = os.path.join(graph_base_path, 'data', graph_name, 'dual_link_structure.edgelist')
print 'Line graph path :: ', line_graph_path
# nx.write_edgelist(L, line_graph_path)

Line graph path ::  /storage/home1/e0202-6/edge-to-vec/data/cora/dual_link_structure.edgelist


## Map graph edges to unique integer index - useful for line graph

In [8]:
edge_dict = {}
index = 0
print(len(G.edges()))
for edge in G.edges():
    edge_dict[tuple(edge)] = index
    index += 1

print len(edge_dict)

5429
5429


## Store line graph edges based on the previously constructed map

In [9]:
# print L.edges()
edge_count = len(L.edges())
line_graph_edges = list(L.edges())
L_new = nx.DiGraph()
L_new.add_nodes_from(edge_dict.values())
# print sorted_edges
for i in range(edge_count):
    edge = line_graph_edges[i]
    start_vertex = edge[0]
    end_vertex = edge[1]
    start_vertex_index_line_graph_edge = edge_dict[start_vertex]
    end_vertex_index_line_graph_edge = edge_dict[end_vertex]
    line_graph_edge_weight = line_graph_edge_weight_dict[edge]
    L_new.add_edge(start_vertex_index_line_graph_edge, end_vertex_index_line_graph_edge, weight=line_graph_edge_weight)
print len(L_new.nodes())

5429


In [10]:
# np.savetxt(line_graph_path, line_graph_edgelist_mat, fmt="%i")
nx.write_edgelist(L_new, line_graph_path, data=True)

## Save the map to a pickle file

In [11]:
edge_to_node_id_dict_filename = os.path.join(graph_base_path, 'data', graph_name, 'edge_to_node_id_dict.pkl')
with open(edge_to_node_id_dict_filename, 'wb') as edge_to_node_id_dict_file:
    pickle.dump(edge_dict, edge_to_node_id_dict_file, pickle.HIGHEST_PROTOCOL)

In [12]:
with open(edge_to_node_id_dict_filename, 'rb') as edge_to_node_id_dict_file:
    loaded_edge_dict = pickle.load(edge_to_node_id_dict_file)
# print loaded_edge_dict

## Store the line graph into adjacency matrix format

In [13]:
G_from_adjacency_mat = nx.adjacency_matrix(L_new)
# print(G_from_adjacency_mat.todense())

In [14]:
mat_file_name = os.path.join(graph_base_path, 'data' , graph_name, 'line_graph_directed.mat')
sio.savemat(mat_file_name, {'graph_sparse' : sparse.csr_matrix(G_from_adjacency_mat)})

## Test the line graph from edgelist and adjacency matrix

In [None]:
L_test = nx.read_edgelist(line_graph_path, data=True)
# print(L_test.edges(data=True))
# print(L.nodes())

In [None]:
line_graph_scipy_adj_mat = sio.loadmat(mat_file_name)['graph_sparse']
L_test_adj_mat = nx.from_scipy_sparse_matrix(line_graph_scipy_adj_mat)
# print(L_test_adj_mat.edges(data=True))

## Convert the dual graph edgelist into edgelist format suitable for node2vec

In [None]:
line_graph_path_for_node2vec = os.path.join(graph_base_path, 'data', graph_name, 'dual_link_structure_node2vec.edgelist')
print 'Line graph path :: ', line_graph_path

In [None]:
nx.write_edgelist(L_test, line_graph_path_for_node2vec)