# IMPORT FUNCFIONS

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import itertools
import pickle
import os
import random
from sklearn.model_selection import train_test_split

from sklearn.metrics import average_precision_score
from sklearn import metrics

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from ge import LINE

# READ DATA

In [2]:
def get_edges_dict(traindata):
    if os.path.isfile("edgesdic.pkl"):
        f = open("edgesdic.pkl","rb")
        edges = pickle.load(f)
        f.close()
        return edges
    else:
        edges = dict()
        for edge in range(len(traindata)): 
            edges[(traindata.loc[[edge]]['id_1'].values[0],traindata.loc[[edge]]['id_2'].values[0])] = 1 
            edges[(traindata.loc[[edge]]['id_2'].values[0],traindata.loc[[edge]]['id_1'].values[0])] = 1  
        f = open("edgesdic.pkl","wb")
        pickle.dump(edges,f)
        f.close()
        return edges

def get_negative_edges(traindata,g):
    edges = get_edges_dict(traindata)
    # for missing edges.
    negative_edges = set([])
    maxNodenum = max(max(traindata['id_1']),max(traindata['id_2']))
    #產生與原圖同edge數目的negative edges
    while (len(negative_edges)<len(traindata)):
        node1=random.randint(0, maxNodenum) 
        node2=random.randint(0, maxNodenum) 
        tmp = edges.get((node1,node2),0) #edge不在graph裡標為0
        if tmp == 0 and node1!=node2 and g.has_node(node1) and g.has_node(node2): # if edge不在graph裡才要做處理
            negative_edges.add((node1,node2))
        else:
            continue
    return negative_edges

# MODELS

In [3]:
def combine_embedding(data,embeddings):
    i=0
    X = []
    for node1,node2 in data:
        X.append(np.concatenate((data[i],embeddings[int(node1)],embeddings[int(node2)])))
   # print(embeddings[str(int(data[0]))])
        i+=1
    return X

In [4]:
all_ap = []
all_auc = []

filename = 'citeseer'

for i in range(10):    
    ori_df = pd.read_csv('data/'+filename+'.txt', header=None, sep=' ')
    ori_df.columns = ['id_1', 'id_2']
    
    nodes_number = 3327
    ori_graph_np = np.zeros((nodes_number, nodes_number))
    for i in range(ori_df.shape[0]):
        ori_graph_np[ori_df.iloc[i, 0], ori_df.iloc[i, 1]] = 1
        ori_graph_np[ori_df.iloc[i, 1], ori_df.iloc[i, 0]] = 1

    ori_G = nx.from_numpy_matrix(ori_graph_np)
    #print(nx.info(ori_G))
    
    negative_edges = get_negative_edges(ori_df, ori_G)
    #print(len(negative_edges))

    train_df, test_df = train_test_split(ori_df, test_size=0.1)
    
    nodes_number = len(ori_G.nodes)
    graph_np = np.zeros((nodes_number, nodes_number))
    for i in range(train_df.shape[0]):
        graph_np[train_df.iloc[i, 0], train_df.iloc[i, 1]] = 1
        graph_np[train_df.iloc[i, 1], train_df.iloc[i, 0]] = 1

    train_G = nx.from_numpy_matrix(graph_np)
    #print(nx.info(train_G))
    
    df_neg = pd.DataFrame(list(negative_edges), columns=['id_1', 'id_2'])
    
    train_df = train_df.values
    test_pos_df = test_df.values
    df_neg = df_neg.values
    
    model = LINE(train_G, embedding_size=12, order='all')
    model.train(batch_size=128, epochs=50, verbose=2)
    
    LINE_embeddings = model.get_embeddings()
    
    train_df = combine_embedding(train_df, LINE_embeddings)
    test_pos_df = combine_embedding(test_pos_df, LINE_embeddings)
    df_neg = combine_embedding(df_neg, LINE_embeddings)
    
    X_train_neg, X_test_neg = train_test_split(df_neg, test_size=0.1)
    
    y_train_pos = np.ones(len(train_df))
    y_train_neg = np.zeros(len(X_train_neg))
    y_test_pos = np.ones(len(test_pos_df))
    y_test_neg = np.zeros(len(X_test_neg))
    
    X_train = np.concatenate((train_df, X_train_neg))
    y_train = np.concatenate((y_train_pos, y_train_neg))
    X_test = np.concatenate((test_pos_df, X_test_neg))
    y_test = np.concatenate((y_test_pos, y_test_neg))
    
    clf = RandomForestClassifier(n_estimators=400)
    clf.fit(X_train, y_train)
    
    predict_Y = clf.predict(X_test)
    
    ap = average_precision_score(y_test, predict_Y)
    print("AP： ", ap)
    fpr, tpr, _ = metrics.roc_curve(y_test, predict_Y, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print("AUC SCORE: ",auc)
    
    all_ap.append(ap)
    all_auc.append(auc)

print("####################### SUM UP ############################")
print("AP MEAN : ", np.array(all_ap).mean())
print("AP STD : ", np.array(all_ap).std())
print("AUC MEAN : ", np.array(all_auc).mean())
print("AUC STD : ", np.array(all_auc).std())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/50
212/212 - 1s - loss: 1.3863 - first_order_loss: 0.6932 - second_order_loss: 0.6931
Epoch 2/50
212/212 - 0s - loss: 1.3857 - first_order_loss: 0.6929 - second_order_loss: 0.6928
Epoch 3/50
212/212 - 0s - loss: 1.3837 - first_order_loss: 0.6925 - second_order_loss: 0.6913
Epoch 4/50
212/212 - 0s - loss: 1.3742 - first_order_loss: 0.6918 - second_order_loss: 0.6824
Epoch 5/50
212/212 - 0s - loss: 1.3423 - first_order_loss: 0.6910 - second_order_loss: 0.6514
Epoch 6/50
212/212 - 0s - loss: 1.2818 - first_order_loss: 0.6895 - second_order_loss: 0.5923
Epoch 7/50
212/212 - 0s - loss: 1.2072 - first_order_loss: 0.6875 - second_order_loss: 0.5197
Epoch 8/50
212/212 - 0s - loss: 1.1372 - first_order_loss: 0.6855 - se

Epoch 26/50
212/212 - 1s - loss: 0.8204 - first_order_loss: 0.6440 - second_order_loss: 0.1764
Epoch 27/50
212/212 - 1s - loss: 0.8133 - first_order_loss: 0.6417 - second_order_loss: 0.1716
Epoch 28/50
212/212 - 1s - loss: 0.8096 - first_order_loss: 0.6390 - second_order_loss: 0.1706
Epoch 29/50
212/212 - 1s - loss: 0.8009 - first_order_loss: 0.6394 - second_order_loss: 0.1615
Epoch 30/50
212/212 - 1s - loss: 0.8018 - first_order_loss: 0.6390 - second_order_loss: 0.1628
Epoch 31/50
212/212 - 1s - loss: 0.7932 - first_order_loss: 0.6355 - second_order_loss: 0.1578
Epoch 32/50
212/212 - 1s - loss: 0.7886 - first_order_loss: 0.6377 - second_order_loss: 0.1510
Epoch 33/50
212/212 - 1s - loss: 0.7790 - first_order_loss: 0.6352 - second_order_loss: 0.1438
Epoch 34/50
212/212 - 1s - loss: 0.7820 - first_order_loss: 0.6336 - second_order_loss: 0.1484
Epoch 35/50
212/212 - 1s - loss: 0.7729 - first_order_loss: 0.6329 - second_order_loss: 0.1401
Epoch 36/50
212/212 - 1s - loss: 0.7679 - first_or

Epoch 12/50
212/212 - 1s - loss: 0.9660 - first_order_loss: 0.6752 - second_order_loss: 0.2908
Epoch 13/50
212/212 - 1s - loss: 0.9453 - first_order_loss: 0.6712 - second_order_loss: 0.2741
Epoch 14/50
212/212 - 1s - loss: 0.9274 - first_order_loss: 0.6696 - second_order_loss: 0.2579
Epoch 15/50
212/212 - 1s - loss: 0.9123 - first_order_loss: 0.6664 - second_order_loss: 0.2459
Epoch 16/50
212/212 - 1s - loss: 0.9048 - first_order_loss: 0.6628 - second_order_loss: 0.2420
Epoch 17/50
212/212 - 1s - loss: 0.8919 - first_order_loss: 0.6612 - second_order_loss: 0.2307
Epoch 18/50
212/212 - 1s - loss: 0.8747 - first_order_loss: 0.6581 - second_order_loss: 0.2166
Epoch 19/50
212/212 - 1s - loss: 0.8741 - first_order_loss: 0.6554 - second_order_loss: 0.2186
Epoch 20/50
212/212 - 1s - loss: 0.8640 - first_order_loss: 0.6539 - second_order_loss: 0.2101
Epoch 21/50
212/212 - 1s - loss: 0.8569 - first_order_loss: 0.6525 - second_order_loss: 0.2044
Epoch 22/50
212/212 - 1s - loss: 0.8502 - first_or

Epoch 48/50
211/211 - 1s - loss: 0.7286 - first_order_loss: 0.6253 - second_order_loss: 0.1032
Epoch 49/50
211/211 - 1s - loss: 0.7253 - first_order_loss: 0.6211 - second_order_loss: 0.1042
Epoch 50/50
211/211 - 1s - loss: 0.7209 - first_order_loss: 0.6230 - second_order_loss: 0.0979
AP：  0.8197659777255908
AUC SCORE:  0.8682766190998903
Epoch 1/50
212/212 - 1s - loss: 1.3863 - first_order_loss: 0.6932 - second_order_loss: 0.6931
Epoch 2/50
212/212 - 1s - loss: 1.3857 - first_order_loss: 0.6929 - second_order_loss: 0.6928
Epoch 3/50
212/212 - 1s - loss: 1.3839 - first_order_loss: 0.6925 - second_order_loss: 0.6914
Epoch 4/50
212/212 - 1s - loss: 1.3750 - first_order_loss: 0.6919 - second_order_loss: 0.6831
Epoch 5/50
212/212 - 1s - loss: 1.3432 - first_order_loss: 0.6910 - second_order_loss: 0.6523
Epoch 6/50
212/212 - 1s - loss: 1.2814 - first_order_loss: 0.6896 - second_order_loss: 0.5918
Epoch 7/50
212/212 - 1s - loss: 1.2083 - first_order_loss: 0.6878 - second_order_loss: 0.5205
Ep

Epoch 34/50
212/212 - 1s - loss: 0.7864 - first_order_loss: 0.6326 - second_order_loss: 0.1539
Epoch 35/50
212/212 - 1s - loss: 0.7799 - first_order_loss: 0.6334 - second_order_loss: 0.1465
Epoch 36/50
212/212 - 1s - loss: 0.7738 - first_order_loss: 0.6304 - second_order_loss: 0.1434
Epoch 37/50
212/212 - 1s - loss: 0.7727 - first_order_loss: 0.6304 - second_order_loss: 0.1423
Epoch 38/50
212/212 - 1s - loss: 0.7678 - first_order_loss: 0.6308 - second_order_loss: 0.1369
Epoch 39/50
212/212 - 1s - loss: 0.7624 - first_order_loss: 0.6310 - second_order_loss: 0.1314
Epoch 40/50
212/212 - 1s - loss: 0.7570 - first_order_loss: 0.6252 - second_order_loss: 0.1318
Epoch 41/50
212/212 - 1s - loss: 0.7533 - first_order_loss: 0.6288 - second_order_loss: 0.1245
Epoch 42/50
212/212 - 1s - loss: 0.7489 - first_order_loss: 0.6263 - second_order_loss: 0.1226
Epoch 43/50
212/212 - 1s - loss: 0.7446 - first_order_loss: 0.6260 - second_order_loss: 0.1186
Epoch 44/50
212/212 - 1s - loss: 0.7444 - first_or

Epoch 20/50
212/212 - 1s - loss: 0.8610 - first_order_loss: 0.6543 - second_order_loss: 0.2067
Epoch 21/50
212/212 - 1s - loss: 0.8527 - first_order_loss: 0.6517 - second_order_loss: 0.2010
Epoch 22/50
212/212 - 1s - loss: 0.8416 - first_order_loss: 0.6484 - second_order_loss: 0.1932
Epoch 23/50
212/212 - 1s - loss: 0.8343 - first_order_loss: 0.6482 - second_order_loss: 0.1861
Epoch 24/50
212/212 - 1s - loss: 0.8253 - first_order_loss: 0.6460 - second_order_loss: 0.1793
Epoch 25/50
212/212 - 1s - loss: 0.8185 - first_order_loss: 0.6432 - second_order_loss: 0.1753
Epoch 26/50
212/212 - 1s - loss: 0.8130 - first_order_loss: 0.6434 - second_order_loss: 0.1696
Epoch 27/50
212/212 - 1s - loss: 0.8039 - first_order_loss: 0.6420 - second_order_loss: 0.1619
Epoch 28/50
212/212 - 1s - loss: 0.7973 - first_order_loss: 0.6377 - second_order_loss: 0.1596
Epoch 29/50
212/212 - 1s - loss: 0.7914 - first_order_loss: 0.6386 - second_order_loss: 0.1528
Epoch 30/50
212/212 - 1s - loss: 0.7855 - first_or