In [1]:
import networkx as nx
import numpy as np
import pickle
import random

In [2]:
def translate_data_into_graph(datapath,weight_idx,time_idx):
    edges=[]
    with open(datapath) as f:
        for line in f:
            tokens = line.strip().split()
            u = int(tokens[0])
            v = int(tokens[1])
            time = int(tokens[time_idx])
            
            if weight_idx:
                weight = int(tokens[weight_idx])
                edges.append((u,v,{'weight':weight,'time':time}))
            else:
                edges.append((u,v,{'time':time}))
            
    g = nx.MultiGraph()
    g.add_edges_from(edges)
    g = g.to_undirected()
    return g

In [3]:
def get_negative_edge(g,first_node=None):
    if first_node is None:
        first_node = np.random.choice(g.nodes())
    
    possible_nodes = set(g.nodes())
    
    neighbor = list(g.neighbors(first_node)) + [first_node]
    
    possible_nodes.difference_update(neighbor)
    
    second_node = np.random.choice(list(possible_nodes))
    
    return (first_node,second_node,{'weight':1,'time':None})

In [4]:
def create_data(g,total=0.5,train=0.8):
    nodes = g.nodes()
    train_edges = []
    pos_edges = []
    neg_edges = []
    train_neg_edges = []
    for node in nodes:
        total_edges = []
        edges_of_node = []
        #只获取到每个节点向外的边
        for e in g.edges(node,data=True): #data=True时可以同时获取到(u,v,ddict)
            edges_of_node.append(e)
        
        edges_of_node = sorted(edges_of_node,key=lambda x:x[2]['time']) #根据时间权重进行排序
        num_edges = len(edges_of_node)
        
        num_total_edges = int(num_edges*total)
        
        total_edges += edges_of_node[:num_total_edges]
        
        random.shuffle(total_edges)

        num_train_edges = int(train * num_total_edges)
        
        train_edges.extend(total_edges[:num_train_edges])
        
        pos_edges.extend(total_edges[num_train_edges:])
        
        for i in range(num_total_edges-num_train_edges):
            neg_edge = get_negative_edge(g)
            neg_edges.append(neg_edge)
        
        for i in range(num_train_edges):
            train_neg_edge = get_negative_edge(g)
            train_neg_edges.append(train_neg_edge)

    return train_edges,train_neg_edges,pos_edges,neg_edges

In [5]:
datapath = './ia-conact/ia-contact.edges'
g = translate_data_into_graph(datapath,weight_idx=2,time_idx=3)

train_edges,train_neg_edges, pos_edges, neg_edges = create_data(g,total=0.5,train=0.8)

save_path = './ia-conact/'
with open(save_path + 'train_edges', 'wb') as f:
    pickle.dump(train_edges, f)

with open(save_path + 'train_neg_edges', 'wb') as f:
    pickle.dump(train_neg_edges, f)

with open(save_path + 'pos_edges', 'wb') as f:
    pickle.dump(pos_edges, f)
with open(save_path + 'neg_edges', 'wb') as f:
    pickle.dump(neg_edges, f)
    


In [6]:
len(neg_edges)

5766

In [7]:
len(train_edges)

22408