In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import itertools
import pickle
import os
import random
from sklearn.model_selection import train_test_split

from sklearn.metrics import average_precision_score
from sklearn import metrics

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from ge import LINE

In [2]:
def get_edges_dict(traindata):
    if os.path.isfile("edgesdic.pkl"):
        f = open("edgesdic.pkl","rb")
        edges = pickle.load(f)
        f.close()
        return edges
    else:
        edges = dict()
        for edge in range(len(traindata)): 
            edges[(traindata.loc[[edge]]['id_1'].values[0],traindata.loc[[edge]]['id_2'].values[0])] = 1 
            edges[(traindata.loc[[edge]]['id_2'].values[0],traindata.loc[[edge]]['id_1'].values[0])] = 1  
        f = open("edgesdic.pkl","wb")
        pickle.dump(edges,f)
        f.close()
        return edges

def get_negative_edges(traindata,g):
    edges = get_edges_dict(traindata)
    # for missing edges.
    negative_edges = set([])
    maxNodenum = max(max(traindata['id_1']),max(traindata['id_2']))
    #產生與原圖同edge數目的negative edges
    while (len(negative_edges)<len(traindata)*2):
        node1=random.randint(0, maxNodenum) 
        node2=random.randint(0, maxNodenum) 
        tmp = edges.get((node1,node2),0) #edge不在graph裡標為0
        if tmp == 0 and node1!=node2 and g.has_node(node1) and g.has_node(node2): # if edge不在graph裡才要做處理
            negative_edges.add((node1,node2))
        else:
            continue
    return negative_edges

In [3]:
def combine_embedding(data,embeddings):
    i=0
    X = []
    for node1,node2 in data:
        X.append(np.concatenate((data[i],embeddings[int(node1)],embeddings[int(node2)])))
   # print(embeddings[str(int(data[0]))])
        i+=1
    return X

In [4]:
ori_df = pd.read_csv('data/karate_edges.txt', header=None, sep=' ')
ori_df.columns = ['id_1', 'id_2']

ori_G = nx.from_pandas_edgelist(ori_df, 'id_1', 'id_2')

In [5]:
model = LINE(ori_G, embedding_size=8, order='all')
model.train(batch_size=12, epochs=300, verbose=2)
LINE_embeddings = model.get_embeddings()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/300
39/39 - 0s - loss: 1.3862 - first_order_loss: 0.6933 - second_order_loss: 0.6929
Epoch 2/300
39/39 - 0s - loss: 1.3853 - first_order_loss: 0.6931 - second_order_loss: 0.6922
Epoch 3/300
39/39 - 0s - loss: 1.3835 - first_order_loss: 0.6931 - second_order_loss: 0.6904
Epoch 4/300
39/39 - 0s - loss: 1.3790 - first_order_loss: 0.6932 - second_order_loss: 0.6858
Epoch 5/300
39/39 - 0s - loss: 1.3714 - first_order_loss: 0.6929 - second_order_loss: 0.6784
Epoch 6/300
39/39 - 0s - loss: 1.3568 - first_order_loss: 0.6929 - second_order_loss: 0.6639
Epoch 7/300
39/39 - 0s - loss: 1.3399 - first_order_loss: 0.6928 - second_order_loss: 0.6471
Epoch 8/300
39/39 - 0s - loss: 1.3157 - first_order_loss: 0.6930 - second_ord

DeepCTR version 0.9.3 detected. Your version is 0.9.2.
Use `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.3


39/39 - 0s - loss: 1.0445 - first_order_loss: 0.6900 - second_order_loss: 0.3547
Epoch 19/300
39/39 - 0s - loss: 1.0706 - first_order_loss: 0.6900 - second_order_loss: 0.3807
Epoch 20/300
39/39 - 0s - loss: 0.9966 - first_order_loss: 0.6879 - second_order_loss: 0.3085
Epoch 21/300
39/39 - 0s - loss: 1.0460 - first_order_loss: 0.6890 - second_order_loss: 0.3572
Epoch 22/300
39/39 - 0s - loss: 1.0279 - first_order_loss: 0.6895 - second_order_loss: 0.3384
Epoch 23/300
39/39 - 0s - loss: 1.0175 - first_order_loss: 0.6840 - second_order_loss: 0.3335
Epoch 24/300
39/39 - 0s - loss: 0.9755 - first_order_loss: 0.6876 - second_order_loss: 0.2878
Epoch 25/300
39/39 - 0s - loss: 1.0082 - first_order_loss: 0.6836 - second_order_loss: 0.3245
Epoch 26/300
39/39 - 0s - loss: 0.9956 - first_order_loss: 0.6850 - second_order_loss: 0.3106
Epoch 27/300
39/39 - 0s - loss: 0.9836 - first_order_loss: 0.6796 - second_order_loss: 0.3040
Epoch 28/300
39/39 - 0s - loss: 0.9771 - first_order_loss: 0.6845 - secon

Epoch 106/300
39/39 - 0s - loss: 0.9084 - first_order_loss: 0.6652 - second_order_loss: 0.2433
Epoch 107/300
39/39 - 0s - loss: 0.9185 - first_order_loss: 0.6542 - second_order_loss: 0.2643
Epoch 108/300
39/39 - 0s - loss: 0.8804 - first_order_loss: 0.6589 - second_order_loss: 0.2213
Epoch 109/300
39/39 - 0s - loss: 0.9118 - first_order_loss: 0.6639 - second_order_loss: 0.2477
Epoch 110/300
39/39 - 0s - loss: 0.8852 - first_order_loss: 0.6609 - second_order_loss: 0.2244
Epoch 111/300
39/39 - 0s - loss: 0.8990 - first_order_loss: 0.6605 - second_order_loss: 0.2383
Epoch 112/300
39/39 - 0s - loss: 0.9120 - first_order_loss: 0.6747 - second_order_loss: 0.2372
Epoch 113/300
39/39 - 0s - loss: 0.9116 - first_order_loss: 0.6600 - second_order_loss: 0.2517
Epoch 114/300
39/39 - 0s - loss: 0.9016 - first_order_loss: 0.6644 - second_order_loss: 0.2372
Epoch 115/300
39/39 - 0s - loss: 0.8958 - first_order_loss: 0.6525 - second_order_loss: 0.2433
Epoch 116/300
39/39 - 0s - loss: 0.9022 - first_or

Epoch 193/300
39/39 - 0s - loss: 0.8295 - first_order_loss: 0.6490 - second_order_loss: 0.1805
Epoch 194/300
39/39 - 0s - loss: 0.8684 - first_order_loss: 0.6627 - second_order_loss: 0.2060
Epoch 195/300
39/39 - 0s - loss: 0.8643 - first_order_loss: 0.6485 - second_order_loss: 0.2157
Epoch 196/300
39/39 - 0s - loss: 0.8268 - first_order_loss: 0.6440 - second_order_loss: 0.1829
Epoch 197/300
39/39 - 0s - loss: 0.8572 - first_order_loss: 0.6438 - second_order_loss: 0.2137
Epoch 198/300
39/39 - 0s - loss: 0.8425 - first_order_loss: 0.6565 - second_order_loss: 0.1860
Epoch 199/300
39/39 - 0s - loss: 0.8334 - first_order_loss: 0.6509 - second_order_loss: 0.1823
Epoch 200/300
39/39 - 0s - loss: 0.8281 - first_order_loss: 0.6504 - second_order_loss: 0.1776
Epoch 201/300
39/39 - 0s - loss: 0.8549 - first_order_loss: 0.6397 - second_order_loss: 0.2153
Epoch 202/300
39/39 - 0s - loss: 0.8518 - first_order_loss: 0.6513 - second_order_loss: 0.2005
Epoch 203/300
39/39 - 0s - loss: 0.8416 - first_or

Epoch 280/300
39/39 - 0s - loss: 0.8152 - first_order_loss: 0.6448 - second_order_loss: 0.1705
Epoch 281/300
39/39 - 0s - loss: 0.8357 - first_order_loss: 0.6450 - second_order_loss: 0.1906
Epoch 282/300
39/39 - 0s - loss: 0.8161 - first_order_loss: 0.6528 - second_order_loss: 0.1632
Epoch 283/300
39/39 - 0s - loss: 0.8286 - first_order_loss: 0.6446 - second_order_loss: 0.1839
Epoch 284/300
39/39 - 0s - loss: 0.8473 - first_order_loss: 0.6584 - second_order_loss: 0.1889
Epoch 285/300
39/39 - 0s - loss: 0.8085 - first_order_loss: 0.6347 - second_order_loss: 0.1737
Epoch 286/300
39/39 - 0s - loss: 0.7996 - first_order_loss: 0.6441 - second_order_loss: 0.1553
Epoch 287/300
39/39 - 0s - loss: 0.8237 - first_order_loss: 0.6355 - second_order_loss: 0.1882
Epoch 288/300
39/39 - 0s - loss: 0.8316 - first_order_loss: 0.6501 - second_order_loss: 0.1815
Epoch 289/300
39/39 - 0s - loss: 0.7983 - first_order_loss: 0.6350 - second_order_loss: 0.1634
Epoch 290/300
39/39 - 0s - loss: 0.8503 - first_or

In [6]:
negative_edges = get_negative_edges(ori_df, ori_G)
train_df = ori_df

df_neg = pd.DataFrame(list(negative_edges), columns=['id_1', 'id_2'])
train_df = train_df.values
df_neg = df_neg.values

train_df = combine_embedding(train_df, LINE_embeddings)
df_neg = combine_embedding(df_neg, LINE_embeddings)

X_train_neg = df_neg

y_train_pos = np.ones(len(train_df))
y_train_neg = np.zeros(len(X_train_neg))

In [7]:
X_train = np.concatenate((train_df, X_train_neg))
y_train = np.concatenate((y_train_pos, y_train_neg))

In [8]:
clf = RandomForestClassifier(n_estimators=400)
clf.fit(X_train, y_train)

RandomForestClassifier(n_estimators=400)

In [9]:
all_edges = []
for i in range(34):
    for j in range(34):
        all_edges.append([i, j])
        
all_edges = pd.DataFrame(all_edges, columns=['id1', 'id2'])

all_df = all_edges.values
all_df = combine_embedding(all_df, LINE_embeddings)

all_df = np.array([list(all_df[i]) for i in range(len(all_df))])

predict_Y = clf.predict(all_df)

adj_rec = predict_Y.reshape(34, 34)

In [10]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

for i in range(34):
    for j in range(34):
        if i>=j :
            adj_rec[i, j] = 0

adj_rec = adj_rec + adj_rec.T

In [11]:
predict_edges = adj_rec.reshape(adj_rec.shape[0]*adj_rec.shape[0])

adj_true = nx.adjacency_matrix(ori_G).todense()
adj_true = np.array(adj_true)
true_edges = adj_true.reshape(34*34)

print("edges： ",predict_edges.sum())
print("ACC： ",accuracy_score(true_edges, predict_edges))
print("AP： ",precision_score(true_edges, predict_edges, average='macro'))
print("RECALL： ",recall_score(true_edges, predict_edges, average='macro'))
print("F1 SCORE： ",f1_score(true_edges, predict_edges, average='macro'))

edges：  204.0
ACC：  0.8062283737024222
AP：  0.6204481792717087
RECALL：  0.649948717948718
F1 SCORE：  0.6315118397085611


In [12]:
predict_graph = predict_edges.reshape(34, 34)
pd.DataFrame(predict_graph).to_csv('LINE_recon_karate.txt', header=None, index=False, sep=' ')