In [7]:
import networkx as nx
import numpy as np
import os
import pickle
import scipy.io as sio
from scipy import sparse

## Construct graph paths for different datasets

In [8]:
graph_base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__'))))
graph_name = 'synthetic'
graph_path = os.path.join(graph_base_path, 'data' , graph_name, 'link_structure.edgelist')
print('Graph path :: ', graph_path)

Graph path ::  /Users/anirban/Documents/IISC-Study/Project/Repos/line2vec/data/synthetic/link_structure.edgelist


## Load graph from edgelist file and convert it to line graph

In [19]:
G = nx.read_edgelist(graph_path, nodetype=int)
print(len(G.edges()))
print(G.edges)
L = nx.line_graph(G)
print(len(L.nodes()))
print(L.edges)

30
[(0, 1), (0, 3), (0, 4), (0, 5), (0, 11), (1, 2), (1, 3), (1, 4), (3, 2), (3, 10), (4, 2), (5, 8), (5, 6), (5, 7), (11, 10), (11, 12), (11, 13), (11, 14), (10, 12), (10, 13), (10, 14), (8, 6), (8, 7), (8, 9), (6, 9), (6, 7), (7, 9), (12, 13), (12, 14), (13, 14)]
30
[((5, 7), (6, 7)), ((5, 7), (7, 8)), ((5, 7), (5, 8)), ((5, 7), (5, 6)), ((5, 7), (0, 5)), ((5, 7), (7, 9)), ((6, 7), (6, 9)), ((6, 7), (7, 9)), ((6, 7), (7, 8)), ((6, 7), (6, 8)), ((6, 7), (5, 6)), ((3, 10), (10, 14)), ((3, 10), (10, 12)), ((3, 10), (10, 11)), ((3, 10), (2, 3)), ((3, 10), (0, 3)), ((3, 10), (1, 3)), ((3, 10), (10, 13)), ((10, 14), (10, 12)), ((10, 14), (12, 14)), ((10, 14), (11, 14)), ((10, 14), (13, 14)), ((10, 14), (10, 11)), ((10, 14), (10, 13)), ((7, 8), (7, 9)), ((7, 8), (6, 8)), ((7, 8), (5, 8)), ((7, 8), (8, 9)), ((10, 12), (10, 11)), ((10, 12), (11, 12)), ((10, 12), (12, 13)), ((10, 12), (10, 13)), ((10, 12), (12, 14)), ((10, 13), (11, 13)), ((10, 13), (12, 13)), ((10, 13), (13, 14)), ((10, 13), 

## Build dictionary of modified edge weights for original graph

For each undirected edge in $e=(v_i, v_j) \in E(G)$, evaluate the edge-weight $w_{ij}$ using the following equation : <br><br>&emsp;&emsp;&emsp;
$w_{ij} = max(\frac{D}{d_i*d_j} + \epsilon, \epsilon)$.
<br> 
<br>$D$ = Total degree of the graph = $2*|E|$, 
<br>$d_i$ = Degree of the vertex $v_i$, 
<br>$d_j$ = Degree of the vertex $v_j$

Here the assumption being the original graph $G=(V, E)$ is unweighted.

In [23]:
# print(G.degree())
epsilon = 0.00001  # hyper-parameter of the model
degree_dict = dict(G.degree())
total_degree = np.sum(list(degree_dict.values()))
print(total_degree)
# print(total_degree)
edge_weight_dict = {}
for edge in G.edges():
    sorted_edge = tuple(sorted(edge))
    start_vertex = edge[0]
    end_vertex = edge[1]
    start_vertex_degree = degree_dict[start_vertex]
    end_vertex_degree = degree_dict[end_vertex]
    edge_weight = max(np.log(float(total_degree) / (start_vertex_degree * end_vertex_degree)) + epsilon, epsilon)
    edge_weight_dict[sorted_edge] = edge_weight
print(edge_weight_dict)

60
{(0, 1): 1.0986222886681098, (0, 3): 1.0986222886681098, (0, 4): 1.3863043611198906, (0, 5): 1.0986222886681098, (0, 11): 0.8754787373538998, (1, 2): 1.6094479124341003, (1, 3): 1.3217658399823196, (1, 4): 1.6094479124341003, (2, 3): 1.6094479124341003, (3, 10): 1.0986222886681098, (2, 4): 1.8971299848858814, (5, 8): 1.3217658399823196, (5, 6): 1.3217658399823196, (5, 7): 1.3217658399823196, (10, 11): 0.8754787373538998, (11, 12): 1.0986222886681098, (11, 13): 1.0986222886681098, (11, 14): 1.0986222886681098, (10, 12): 1.0986222886681098, (10, 13): 1.0986222886681098, (10, 14): 1.0986222886681098, (6, 8): 1.3217658399823196, (7, 8): 1.3217658399823196, (8, 9): 1.6094479124341003, (6, 9): 1.6094479124341003, (6, 7): 1.3217658399823196, (7, 9): 1.6094479124341003, (12, 13): 1.3217658399823196, (12, 14): 1.3217658399823196, (13, 14): 1.3217658399823196}


## Build dictionary of weighted degree of nodes 
This is measured based on the weights of edges associated with a node of the original graph calculated previously. For any node $v_i \in V(G)$, if the outgoing edges associated with it are denoted by $e_{ik}$ for those $k$ such that $e_{ik} \in E(G)$, then the weighted degree is as follows: <br>

&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;
$d^{'}_{i} = \sum_{k}{w_{ik}}$

In [24]:
weighted_degree_dict = {}
for node in G.nodes():
    weight = 0
    for neighbor in G.neighbors(node):
        if((node, neighbor) in edge_weight_dict):
            weight += edge_weight_dict[(node, neighbor)]
        else:
            weight += edge_weight_dict[(neighbor, node)]
    weighted_degree_dict[node] = weight
# print(weighted_degree_dict)

## Build weighted line graph 

In the original graph, two undirected edges $e_{ij}$ and $e_{jk}$ are adjacent if and only if $v_j$ is the common vertex of $e_{ij}$ and $e_{jk}$. We define the edge weight $w^{L}_{\{i,k\}}$for $e^{L}_{ik}$ for the line graph $L(G)$ as averaged weights of two directed edges $e^{L}_{ik}$ and $e^{L}_{ki}$, as follows: <br>

&emsp;&emsp;&emsp;&emsp;&emsp;
$ w^L_{ik} = \frac{d_i}{d_i + d_j} \times \frac{w_{jk}} {\sum_{l=1}^{|V|}{w_{jl}} - w_{ij}} = \frac{d_i}{d_i + d_j} \times \frac{w_{jk}} {{d^{'}_{j}} - w_{ij}}$

&emsp;&emsp;&emsp;&emsp;&emsp;
$ w^L_{ki} = \frac{d_k}{d_k + d_j} \times \frac{w_{ji}} {\sum_{l=1}^{|V|}{w_{jl}} - w_{kj}} = \frac{d_k}{d_j + d_k} \times \frac{w_{ji}} {{d^{'}_{j}} - w_{kj}}$

&emsp;&emsp;&emsp;&emsp;&emsp;
$ w^{L}_{\{i,k\}} = \frac{w^L_{ik} + w^L_{ki}}{2}$

In [31]:
line_graph_edge_weight_dict = {}
for line_graph_edge in L.edges():
    original_graph_edge_1 = line_graph_edge[0]
    original_graph_edge_2 = line_graph_edge[1]
    common_vertex = set(original_graph_edge_1).intersection(set(original_graph_edge_2))
    start_vertex = set(original_graph_edge_1).difference(common_vertex)
    end_vertex = set(original_graph_edge_2).difference(common_vertex)
    if(len(common_vertex) == 1 and len(start_vertex) != 0 and len(end_vertex) != 0):
        common_vertex = list(common_vertex)[0]
        start_vertex = list(start_vertex)[0]
        end_vertex = list(end_vertex)[0]
    else:
        # Handle the odd case of self-loops or parallel-edges
        common_vertex = original_graph_edge_src[1]
        start_vertex = original_graph_edge_src[0]
        end_vertex = original_graph_edge_dest[1]

    degree_start_vertex_edge_1 = degree_dict[start_vertex]
    degree_end_vertex_edge_1 = degree_dict[common_vertex]
    if (degree_start_vertex_edge_1 == 1):
        weight_contri_src_edge_1 = 1
    else:
        weight_contri_src_edge_1 = float(degree_start_vertex_edge_1)/ (degree_start_vertex_edge_1 + degree_end_vertex_edge_1)
    
    weight_dest_edge = edge_weight_dict[original_graph_edge_2]
    weight_src_edge = edge_weight_dict[original_graph_edge_1]
    weighted_degree_common_vertex = weighted_degree_dict[common_vertex]
    if((weighted_degree_common_vertex - weight_src_edge) == 0):
        print('In impossible case!')
        weight_contri_dest_edge_1 = 0
    else:
        weight_contri_dest_edge_1 = float(weight_dest_edge)/(weighted_degree_common_vertex - weight_src_edge)
    line_graph_edge_weight_1 = weight_contri_src_edge_1 * weight_contri_dest_edge_1
#     line_graph_edge_weight_dict[line_graph_edge] = line_graph_edge_weight
    
    
    
    degree_start_vertex_edge_2 = degree_dict[end_vertex]
    degree_end_vertex_edge_2 = degree_dict[common_vertex]
    if (degree_end_vertex_edge_2 == 1):
        weight_contri_src_edge_2 = 1
    else:
        weight_contri_src_edge_2 = float(degree_start_vertex_edge_2)/ (degree_start_vertex_edge_2 + degree_end_vertex_edge_2)
    
    weight_dest_edge = edge_weight_dict[original_graph_edge_1]
    weight_src_edge = edge_weight_dict[original_graph_edge_2]
    weighted_degree_common_vertex = weighted_degree_dict[common_vertex]
    if((weighted_degree_common_vertex - weight_src_edge) == 0):
        print('In impossible case!')
        weight_contri_dest_edge_2 = 0
    else:
        weight_contri_dest_edge_2 = float(weight_dest_edge)/(weighted_degree_common_vertex - weight_src_edge)
    line_graph_edge_weight_2 = weight_contri_src_edge_2 * weight_contri_dest_edge_2
    line_graph_edge_weight_dict[line_graph_edge] = (line_graph_edge_weight_1 + line_graph_edge_weight_2)/2
# print(line_graph_edge_weight_dict)

## Define path for the line graph

In [7]:
line_graph_path = os.path.join(graph_base_path, 'data', graph_name, 'dual_link_structure.edgelist')
print 'Line graph path :: ', line_graph_path
# nx.write_edgelist(L, line_graph_path)

Line graph path ::  /storage/home1/e0202-6/edge-to-vec/data/synthetic_4cluster/dual_link_structure.edgelist


## Map graph edges to unique integer index - useful for line graph

In [8]:
edge_dict = {}
index = 0
print(len(G.edges()))
for edge in G.edges():
    edge_dict[edge] = index
    index += 1

print len(edge_dict)

112
112


## Store line graph edges based on the previously constructed map

In [9]:
# print L.edges()
edge_count = len(L.edges())
line_graph_edges = list(L.edges())
L_new = nx.Graph()
# print sorted_edges
for i in range(edge_count):
    edge = line_graph_edges[i]
    start_vertex = edge[0]
    end_vertex = edge[1]
    start_vertex_index_line_graph_edge = edge_dict[start_vertex]
    end_vertex_index_line_graph_edge = edge_dict[end_vertex]
    line_graph_edge_weight = line_graph_edge_weight_dict[edge]
    L_new.add_edge(start_vertex_index_line_graph_edge, end_vertex_index_line_graph_edge, weight=line_graph_edge_weight)
# print L_new.edges(data=True)

In [10]:
# np.savetxt(line_graph_path, line_graph_edgelist_mat, fmt="%.4e")
nx.write_edgelist(L_new, line_graph_path, data=True)

## Save the map to a pickle file

In [11]:
edge_to_node_id_dict_filename = os.path.join(graph_base_path, 'data', graph_name, 'edge_to_node_id_dict.pkl')
with open(edge_to_node_id_dict_filename, 'wb') as edge_to_node_id_dict_file:
    pickle.dump(edge_dict, edge_to_node_id_dict_file, pickle.HIGHEST_PROTOCOL)

In [12]:
with open(edge_to_node_id_dict_filename, 'rb') as edge_to_node_id_dict_file:
    loaded_edge_dict = pickle.load(edge_to_node_id_dict_file)
# print loaded_edge_dict

## Store the line graph into adjacency matrix format

In [13]:
G_from_adjacency_mat = nx.adjacency_matrix(L_new)
# print(G_from_adjacency_mat.todense())

In [14]:
mat_file_name = os.path.join(graph_base_path, 'data' , graph_name, 'line_graph_undirected.mat')
sio.savemat(mat_file_name, {'graph_sparse' : sparse.csr_matrix(G_from_adjacency_mat)})

## Test the line graph from edgelist and adjacency matrix

In [None]:
L_test = nx.read_edgelist(line_graph_path)
print(L_test.edges(data=True))

In [None]:
line_graph_scipy_adj_mat = sio.loadmat(mat_file_name)['graph_sparse']
L_test_adj_mat = nx.from_scipy_sparse_matrix(line_graph_scipy_adj_mat)
print(L_test_adj_mat.edges(data=True))

In [105]:
i = 10
print('Iter %s'%(i))

Iter 10
