In [1]:
import random

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
import pickle
import sklearn.metrics as metrics

### functions

In [2]:
def edge_features(node_emb_1, node_emb_2, operator):
    node_emb_1 = np.asfarray(node_emb_1,float)
    node_emb_2 = np.asfarray(node_emb_2, float)
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Concat':
        edge = np.concatenate((node_emb_1, node_emb_2), axis=None) 
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1-1]
        node_emb_2 = node_embeddings[node_index_2-1]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    if (precision + recall) != 0.0:
        f1 = 2 * (precision * recall) / (precision + recall)
        print("F1: {}".format(f1))
    else:
        print("F1: Divide-by-zero")
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    return precision, recall

### Amazon-Food - Monthly
### load data

In [3]:
# load the training and testing graph
with open('../data/amazon_food/amazon_food_monthly_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
    
graphs = graphs[-20:]
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [4]:
num_snapshots = len(graphs)
num_snapshots

20

In [5]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/amazon_food/DW_Embeddings/amazon_month/amazon_month_' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

20


### independent / no combination

In [6]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [7]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1280
TN:  1536
FP:  1249
FN:  1505
F1: 0.48174633044787357
[[1536 1249]
 [1505 1280]]
              precision    recall  f1-score   support

           0       0.51      0.55      0.53      2785
           1       0.51      0.46      0.48      2785

   micro avg       0.51      0.51      0.51      5570
   macro avg       0.51      0.51      0.50      5570
weighted avg       0.51      0.51      0.50      5570

Mean Average Precision: 0.5028168754121846
Area Under ROC Curve: 0.5055655296229803
Precision:  0.5061289047054172
Recall:  0.45960502692998206
[LibLinear]Edge Operator: Hadamard
TP:  1380
TN:  1422
FP:  1363
FN:  1405
F1: 0.4992764109985528
[[1422 1363]
 [1405 1380]]
              precision    recall  f1-score   support

           0       0.50      0.51      0.51      2785
           1       0.50      0.50      0.50      2785

   micro avg       0.50      0.50      0.50      5570
   macro avg       0.50      0.50      0.50      5570
weighte

### sum

In [8]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [9]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1329
TN:  1493
FP:  1292
FN:  1456
F1: 0.4916759156492786
[[1493 1292]
 [1456 1329]]
              precision    recall  f1-score   support

           0       0.51      0.54      0.52      2785
           1       0.51      0.48      0.49      2785

   micro avg       0.51      0.51      0.51      5570
   macro avg       0.51      0.51      0.51      5570
weighted avg       0.51      0.51      0.51      5570

Mean Average Precision: 0.5033682513218398
Area Under ROC Curve: 0.5066427289048474
Precision:  0.507058374666158
Recall:  0.47719928186714544
[LibLinear]Edge Operator: Hadamard
TP:  1464
TN:  1391
FP:  1394
FN:  1321
F1: 0.5188729399255715
[[1391 1394]
 [1321 1464]]
              precision    recall  f1-score   support

           0       0.51      0.50      0.51      2785
           1       0.51      0.53      0.52      2785

   micro avg       0.51      0.51      0.51      5570
   macro avg       0.51      0.51      0.51      5570
weighted 

### expdecay

In [16]:
exps = [np.exp(-i * 0.3) for i in range(1,21)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c
print(len(emb_list))
print(len(exps))

20
20


In [17]:
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [18]:
for edge_operator in [ 'Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1225
TN:  1528
FP:  1257
FN:  1560
F1: 0.4651604328839947
[[1528 1257]
 [1560 1225]]
              precision    recall  f1-score   support

           0       0.49      0.55      0.52      2785
           1       0.49      0.44      0.47      2785

   micro avg       0.49      0.49      0.49      5570
   macro avg       0.49      0.49      0.49      5570
weighted avg       0.49      0.49      0.49      5570

Mean Average Precision: 0.49716450363623477
Area Under ROC Curve: 0.4942549371633752
Precision:  0.4935535858178888
Recall:  0.4398563734290844
[LibLinear]Edge Operator: Hadamard
TP:  1447
TN:  1365
FP:  1420
FN:  1338
F1: 0.5120311394196745
[[1365 1420]
 [1338 1447]]
              precision    recall  f1-score   support

           0       0.50      0.49      0.50      2785
           1       0.50      0.52      0.51      2785

   micro avg       0.50      0.50      0.50      5570
   macro avg       0.50      0.50      0.50      5570
weighted

### Amazon-Food - Weekly
### load data

In [19]:
# load the training and testing graph
with open('../data/amazon_food/amazon_food_weekly_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
    
graphs = graphs[-20:]
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [20]:
num_snapshots = len(graphs)
num_snapshots

20

In [21]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/amazon_food/DW_Embeddings/amazon_week/amazon_week_' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

20


### independent / no combination

In [22]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [23]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  580
TN:  730
FP:  548
FN:  698
F1: 0.48212801330008315
[[730 548]
 [698 580]]
              precision    recall  f1-score   support

           0       0.51      0.57      0.54      1278
           1       0.51      0.45      0.48      1278

   micro avg       0.51      0.51      0.51      2556
   macro avg       0.51      0.51      0.51      2556
weighted avg       0.51      0.51      0.51      2556

Mean Average Precision: 0.5064373633447652
Area Under ROC Curve: 0.5125195618153364
Precision:  0.5141843971631206
Recall:  0.4538341158059468
[LibLinear]Edge Operator: Hadamard
TP:  641
TN:  609
FP:  669
FN:  637
F1: 0.49536321483771256
[[609 669]
 [637 641]]
              precision    recall  f1-score   support

           0       0.49      0.48      0.48      1278
           1       0.49      0.50      0.50      1278

   micro avg       0.49      0.49      0.49      2556
   macro avg       0.49      0.49      0.49      2556
weighted avg       0.49

### sum

In [24]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [25]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  569
TN:  723
FP:  555
FN:  709
F1: 0.47377185678601164
[[723 555]
 [709 569]]
              precision    recall  f1-score   support

           0       0.50      0.57      0.53      1278
           1       0.51      0.45      0.47      1278

   micro avg       0.51      0.51      0.51      2556
   macro avg       0.51      0.51      0.50      2556
weighted avg       0.51      0.51      0.50      2556

Mean Average Precision: 0.5027727654976916
Area Under ROC Curve: 0.5054773082942097
Precision:  0.5062277580071174
Recall:  0.44522691705790296
[LibLinear]Edge Operator: Hadamard
TP:  572
TN:  711
FP:  567
FN:  706
F1: 0.4733140256516342
[[711 567]
 [706 572]]
              precision    recall  f1-score   support

           0       0.50      0.56      0.53      1278
           1       0.50      0.45      0.47      1278

   micro avg       0.50      0.50      0.50      2556
   macro avg       0.50      0.50      0.50      2556
weighted avg       0.50

### expdecay

In [30]:
exps = [np.exp(-i * 0.3) for i in range(1,21)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 
print(len(emb_list))
print(len(exps))

20
20


In [31]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  539
TN:  766
FP:  512
FN:  739
F1: 0.4628595963933018
[[766 512]
 [739 539]]
              precision    recall  f1-score   support

           0       0.51      0.60      0.55      1278
           1       0.51      0.42      0.46      1278

   micro avg       0.51      0.51      0.51      2556
   macro avg       0.51      0.51      0.51      2556
weighted avg       0.51      0.51      0.51      2556

Mean Average Precision: 0.5054173758057383
Area Under ROC Curve: 0.5105633802816901
Precision:  0.5128449096098954
Recall:  0.4217527386541471
[LibLinear]Edge Operator: Hadamard
TP:  628
TN:  648
FP:  630
FN:  650
F1: 0.49526813880126186
[[648 630]
 [650 628]]
              precision    recall  f1-score   support

           0       0.50      0.51      0.50      1278
           1       0.50      0.49      0.50      1278

   micro avg       0.50      0.50      0.50      2556
   macro avg       0.50      0.50      0.50      2556
weighted avg       0.50 

### Amazon-Food - Equal Monthly
### load data

In [32]:
# load the training and testing graph
with open('../data/amazon_food/amazon_food_equal_monthly_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
    
graphs = graphs[-20:]
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [33]:
num_snapshots = len(graphs)
num_snapshots

20

In [34]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/amazon_food/DW_Embeddings/amazon_equal_month/amazon_equal_month_' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

20


### independent / no combination

In [35]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [36]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  298
TN:  395
FP:  235
FN:  332
F1: 0.5124677558039553
[[395 235]
 [332 298]]
              precision    recall  f1-score   support

           0       0.54      0.63      0.58       630
           1       0.56      0.47      0.51       630

   micro avg       0.55      0.55      0.55      1260
   macro avg       0.55      0.55      0.55      1260
weighted avg       0.55      0.55      0.55      1260

Mean Average Precision: 0.5279549718574108
Area Under ROC Curve: 0.55
Precision:  0.5590994371482176
Recall:  0.473015873015873
[LibLinear]Edge Operator: Hadamard
TP:  339
TN:  322
FP:  308
FN:  291
F1: 0.5309318715740016
[[322 308]
 [291 339]]
              precision    recall  f1-score   support

           0       0.53      0.51      0.52       630
           1       0.52      0.54      0.53       630

   micro avg       0.52      0.52      0.52      1260
   macro avg       0.52      0.52      0.52      1260
weighted avg       0.52      0.52      0

### sum

In [37]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [38]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  306
TN:  371
FP:  259
FN:  324
F1: 0.5121338912133891
[[371 259]
 [324 306]]
              precision    recall  f1-score   support

           0       0.53      0.59      0.56       630
           1       0.54      0.49      0.51       630

   micro avg       0.54      0.54      0.54      1260
   macro avg       0.54      0.54      0.54      1260
weighted avg       0.54      0.54      0.54      1260

Mean Average Precision: 0.5202022756005057
Area Under ROC Curve: 0.5373015873015874
Precision:  0.5415929203539823
Recall:  0.4857142857142857
[LibLinear]Edge Operator: Hadamard
TP:  275
TN:  346
FP:  284
FN:  355
F1: 0.46257359125315395
[[346 284]
 [355 275]]
              precision    recall  f1-score   support

           0       0.49      0.55      0.52       630
           1       0.49      0.44      0.46       630

   micro avg       0.49      0.49      0.49      1260
   macro avg       0.49      0.49      0.49      1260
weighted avg       0.49 

### expdecay

In [43]:
exps = [np.exp(-i * 0.3) for i in range(1,21)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 
print(len(emb_list))
print(len(exps))

20
20


In [44]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  318
TN:  411
FP:  219
FN:  312
F1: 0.544987146529563
[[411 219]
 [312 318]]
              precision    recall  f1-score   support

           0       0.57      0.65      0.61       630
           1       0.59      0.50      0.54       630

   micro avg       0.58      0.58      0.58      1260
   macro avg       0.58      0.58      0.58      1260
weighted avg       0.58      0.58      0.58      1260

Mean Average Precision: 0.5465283320031924
Area Under ROC Curve: 0.5785714285714286
Precision:  0.5921787709497207
Recall:  0.5047619047619047
[LibLinear]Edge Operator: Hadamard
TP:  323
TN:  299
FP:  331
FN:  307
F1: 0.5031152647975078
[[299 331]
 [307 323]]
              precision    recall  f1-score   support

           0       0.49      0.47      0.48       630
           1       0.49      0.51      0.50       630

   micro avg       0.49      0.49      0.49      1260
   macro avg       0.49      0.49      0.49      1260
weighted avg       0.49   

### Amazon-Food - Equal Weekly
### load data

In [45]:
# load the training and testing graph
with open('../data/amazon_food/amazon_food_equal_weekly_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
    
graphs = graphs[-20:]
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [46]:
num_snapshots = len(graphs)
num_snapshots

20

In [47]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/amazon_food/DW_Embeddings/amazon_equal_week/amazon_equal_week_' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

20


### independent / no combination

In [48]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [49]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  143
TN:  196
FP:  124
FN:  177
F1: 0.48722316865417375
[[196 124]
 [177 143]]
              precision    recall  f1-score   support

           0       0.53      0.61      0.57       320
           1       0.54      0.45      0.49       320

   micro avg       0.53      0.53      0.53       640
   macro avg       0.53      0.53      0.53       640
weighted avg       0.53      0.53      0.53       640

Mean Average Precision: 0.5159000468164794
Area Under ROC Curve: 0.5296875
Precision:  0.5355805243445693
Recall:  0.446875
[LibLinear]Edge Operator: Hadamard
TP:  129
TN:  193
FP:  127
FN:  191
F1: 0.44791666666666674
[[193 127]
 [191 129]]
              precision    recall  f1-score   support

           0       0.50      0.60      0.55       320
           1       0.50      0.40      0.45       320

   micro avg       0.50      0.50      0.50       640
   macro avg       0.50      0.50      0.50       640
weighted avg       0.50      0.50      0.5

### sum

In [50]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [51]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  136
TN:  208
FP:  112
FN:  184
F1: 0.47887323943661964
[[208 112]
 [184 136]]
              precision    recall  f1-score   support

           0       0.53      0.65      0.58       320
           1       0.55      0.42      0.48       320

   micro avg       0.54      0.54      0.54       640
   macro avg       0.54      0.54      0.53       640
weighted avg       0.54      0.54      0.53       640

Mean Average Precision: 0.5205645161290322
Area Under ROC Curve: 0.5375
Precision:  0.5483870967741935
Recall:  0.425
[LibLinear]Edge Operator: Hadamard
TP:  130
TN:  189
FP:  131
FN:  190
F1: 0.44750430292598964
[[189 131]
 [190 130]]
              precision    recall  f1-score   support

           0       0.50      0.59      0.54       320
           1       0.50      0.41      0.45       320

   micro avg       0.50      0.50      0.50       640
   macro avg       0.50      0.50      0.49       640
weighted avg       0.50      0.50      0.49     

### expdecay

In [56]:
exps = [np.exp(-i * 0.3) for i in range(1,21)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 
print(len(exps))
print(len(emb_list))

20
20


In [57]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  156
TN:  180
FP:  140
FN:  164
F1: 0.5064935064935064
[[180 140]
 [164 156]]
              precision    recall  f1-score   support

           0       0.52      0.56      0.54       320
           1       0.53      0.49      0.51       320

   micro avg       0.53      0.53      0.53       640
   macro avg       0.53      0.53      0.52       640
weighted avg       0.53      0.53      0.52       640

Mean Average Precision: 0.5131756756756756
Area Under ROC Curve: 0.525
Precision:  0.527027027027027
Recall:  0.4875
[LibLinear]Edge Operator: Hadamard
TP:  140
TN:  185
FP:  135
FN:  180
F1: 0.47058823529411764
[[185 135]
 [180 140]]
              precision    recall  f1-score   support

           0       0.51      0.58      0.54       320
           1       0.51      0.44      0.47       320

   micro avg       0.51      0.51      0.51       640
   macro avg       0.51      0.51      0.51       640
weighted avg       0.51      0.51      0.51       