In [3]:
import random

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
import pickle
import sklearn.metrics as metrics

### functions

In [41]:
def edge_features(node_emb_1, node_emb_2, operator):
    node_emb_1 = np.asfarray(node_emb_1,float)
    node_emb_2 = np.asfarray(node_emb_2, float)
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Concat':
        edge = np.concatenate((node_emb_1, node_emb_2), axis=None) 
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1-1]
        node_emb_2 = node_embeddings[node_index_2-1]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = recall = 0
    try:
        precision = true_positive / (true_positive + false_positive)
        recall = true_positive / (true_positive + false_negative)
        if (precision + recall) != 0.0:
            f1 = 2 * (precision * recall) / (precision + recall)
            print("F1: {}".format(f1))            
    except:
        print("F1: Divide-by-zero")
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    return precision, recall

### RM - Monthly
### load data

In [5]:
# load the training and testing graph
with open('../RM_monthly.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [7]:
num_snapshots = len(graphs)
num_snapshots

11

In [16]:
emb_list = []
for i in range(num_snapshots):
    file = '../RM_emb/em-vc-month/em-vc-month-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

11


### independent / no combination

In [17]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [18]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  4
TN:  221
FP:  1
FN:  218
F1: 0.035242290748898675
[[221   1]
 [218   4]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67       222
           1       0.80      0.02      0.04       222

    accuracy                           0.51       444
   macro avg       0.65      0.51      0.35       444
weighted avg       0.65      0.51      0.35       444

Mean Average Precision: 0.5054054054054054
Area Under ROC Curve: 0.5067567567567568
Precision:  0.8
Recall:  0.018018018018018018
[LibLinear]Edge Operator: Hadamard
TP:  17
TN:  218
FP:  4
FN:  205
F1: 0.13991769547325103
[[218   4]
 [205  17]]
              precision    recall  f1-score   support

           0       0.52      0.98      0.68       222
           1       0.81      0.08      0.14       222

    accuracy                           0.53       444
   macro avg       0.66      0.53      0.41       444
weighted avg       0.66      0.53      0.4

### sum

In [19]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [20]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  204
TN:  218
FP:  4
FN:  18
F1: 0.9488372093023255
[[218   4]
 [ 18 204]]
              precision    recall  f1-score   support

           0       0.92      0.98      0.95       222
           1       0.98      0.92      0.95       222

    accuracy                           0.95       444
   macro avg       0.95      0.95      0.95       444
weighted avg       0.95      0.95      0.95       444

Mean Average Precision: 0.9417879417879418
Area Under ROC Curve: 0.9504504504504504
Precision:  0.9807692307692307
Recall:  0.918918918918919
[LibLinear]Edge Operator: Hadamard
TP:  100
TN:  177
FP:  45
FN:  122
F1: 0.5449591280653951
[[177  45]
 [122 100]]
              precision    recall  f1-score   support

           0       0.59      0.80      0.68       222
           1       0.69      0.45      0.54       222

    accuracy                           0.62       444
   macro avg       0.64      0.62      0.61       444
weighted avg       0.64      0

### expdecay

In [27]:
exps = [np.exp(-i * 0.9) for i in range(1,17)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c

In [28]:
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [29]:
for edge_operator in [ 'Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  119
TN:  219
FP:  3
FN:  103
F1: 0.6918604651162791
[[219   3]
 [103 119]]
              precision    recall  f1-score   support

           0       0.68      0.99      0.81       222
           1       0.98      0.54      0.69       222

    accuracy                           0.76       444
   macro avg       0.83      0.76      0.75       444
weighted avg       0.83      0.76      0.75       444

Mean Average Precision: 0.7548368040171318
Area Under ROC Curve: 0.7612612612612614
Precision:  0.9754098360655737
Recall:  0.536036036036036
[LibLinear]Edge Operator: Hadamard
TP:  61
TN:  212
FP:  10
FN:  161
F1: 0.416382252559727
[[212  10]
 [161  61]]
              precision    recall  f1-score   support

           0       0.57      0.95      0.71       222
           1       0.86      0.27      0.42       222

    accuracy                           0.61       444
   macro avg       0.71      0.61      0.56       444
weighted avg       0.71      0.

### RM - Weekly
### load data

In [21]:
# load the training and testing graph
with open('../.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [22]:
num_snapshots = len(graphs)
num_snapshots

74

In [23]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/email_EU/embeddings/em-email-week-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

74


### independent / no combination

In [24]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [25]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  758
TN:  1905
FP:  780
FN:  1927
F1: 0.35898650248638403
[[1905  780]
 [1927  758]]
              precision    recall  f1-score   support

           0       0.50      0.71      0.58      2685
           1       0.49      0.28      0.36      2685

   micro avg       0.50      0.50      0.50      5370
   macro avg       0.49      0.50      0.47      5370
weighted avg       0.49      0.50      0.47      5370

Mean Average Precision: 0.49798088402312124
Area Under ROC Curve: 0.4959031657355679
Precision:  0.4928478543563069
Recall:  0.2823091247672253
[LibLinear]Edge Operator: Hadamard
TP:  1765
TN:  1024
FP:  1661
FN:  920
F1: 0.5776468663066602
[[1024 1661]
 [ 920 1765]]
              precision    recall  f1-score   support

           0       0.53      0.38      0.44      2685
           1       0.52      0.66      0.58      2685

   micro avg       0.52      0.52      0.52      5370
   macro avg       0.52      0.52      0.51      5370
weighted a

### sum

In [26]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [27]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1672
TN:  1563
FP:  1122
FN:  1013
F1: 0.610330352254061
[[1563 1122]
 [1013 1672]]
              precision    recall  f1-score   support

           0       0.61      0.58      0.59      2685
           1       0.60      0.62      0.61      2685

   micro avg       0.60      0.60      0.60      5370
   macro avg       0.60      0.60      0.60      5370
weighted avg       0.60      0.60      0.60      5370

Mean Average Precision: 0.5612912212789044
Area Under ROC Curve: 0.6024208566108008
Precision:  0.5984251968503937
Recall:  0.6227188081936685
[LibLinear]Edge Operator: Hadamard
TP:  1382
TN:  1425
FP:  1260
FN:  1303
F1: 0.5188661535573493
[[1425 1260]
 [1303 1382]]
              precision    recall  f1-score   support

           0       0.52      0.53      0.53      2685
           1       0.52      0.51      0.52      2685

   micro avg       0.52      0.52      0.52      5370
   macro avg       0.52      0.52      0.52      5370
weighted a

### expdecay

In [41]:
exps = [np.exp(-i * 0.3) for i in range(1,74)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [42]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1378
TN:  1756
FP:  929
FN:  1307
F1: 0.5520833333333333
[[1756  929]
 [1307 1378]]
              precision    recall  f1-score   support

           0       0.57      0.65      0.61      2685
           1       0.60      0.51      0.55      2685

   micro avg       0.58      0.58      0.58      5370
   macro avg       0.59      0.58      0.58      5370
weighted avg       0.59      0.58      0.58      5370

Mean Average Precision: 0.5499428909988949
Area Under ROC Curve: 0.5836126629422719
Precision:  0.5973125270914608
Recall:  0.5132216014897579
[LibLinear]Edge Operator: Hadamard
TP:  1947
TN:  878
FP:  1807
FN:  738
F1: 0.604752290728374
[[ 878 1807]
 [ 738 1947]]
              precision    recall  f1-score   support

           0       0.54      0.33      0.41      2685
           1       0.52      0.73      0.60      2685

   micro avg       0.53      0.53      0.53      5370
   macro avg       0.53      0.53      0.51      5370
weighted avg 

### Email-EU - Equal Monthly
### load data

In [30]:
# load the training and testing graph
with open('../RM_weekly.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [31]:
num_snapshots = len(graphs)
num_snapshots

51

In [32]:
emb_list = []
for i in range(num_snapshots):
    file = '../RM_emb/em-vc-week/em-vc-week-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

51


### independent / no combination

In [33]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [43]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  0
TN:  14
FP:  0
FN:  14
F1: Divide-by-zero
[[14  0]
 [14  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        14
           1       0.00      0.00      0.00        14

    accuracy                           0.50        28
   macro avg       0.25      0.50      0.33        28
weighted avg       0.25      0.50      0.33        28

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0
[LibLinear]Edge Operator: Hadamard
TP:  3
TN:  14
FP:  0
FN:  11
F1: 0.35294117647058826
[[14  0]
 [11  3]]
              precision    recall  f1-score   support

           0       0.56      1.00      0.72        14
           1       1.00      0.21      0.35        14

    accuracy                           0.61        28
   macro avg       0.78      0.61      0.54        28
weighted avg       0.78      0.61      0.54        28

Mean Average Precision: 0.6071428571428571
Area Under ROC

  'precision', 'predicted', average, warn_for)


[LibLinear]Edge Operator: Weighted-L1
TP:  0
TN:  14
FP:  0
FN:  14
F1: Divide-by-zero
[[14  0]
 [14  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        14
           1       0.00      0.00      0.00        14

    accuracy                           0.50        28
   macro avg       0.25      0.50      0.33        28
weighted avg       0.25      0.50      0.33        28

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0


  'precision', 'predicted', average, warn_for)


[LibLinear]Edge Operator: Weighted-L2
TP:  0
TN:  14
FP:  0
FN:  14
F1: Divide-by-zero
[[14  0]
 [14  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        14
           1       0.00      0.00      0.00        14

    accuracy                           0.50        28
   macro avg       0.25      0.50      0.33        28
weighted avg       0.25      0.50      0.33        28

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0
[LibLinear]Edge Operator: Concat
TP:  0
TN:  14
FP:  0
FN:  14
F1: Divide-by-zero
[[14  0]
 [14  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        14
           1       0.00      0.00      0.00        14

    accuracy                           0.50        28
   macro avg       0.25      0.50      0.33        28
weighted avg       0.25      0.50      0.33        28

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precis

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


### sum

In [44]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [45]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  14
TN:  14
FP:  0
FN:  0
F1: 1.0
[[14  0]
 [ 0 14]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14

    accuracy                           1.00        28
   macro avg       1.00      1.00      1.00        28
weighted avg       1.00      1.00      1.00        28

Mean Average Precision: 1.0
Area Under ROC Curve: 1.0
Precision:  1.0
Recall:  1.0
[LibLinear]Edge Operator: Hadamard
TP:  9
TN:  13
FP:  1
FN:  5
F1: 0.75
[[13  1]
 [ 5  9]]
              precision    recall  f1-score   support

           0       0.72      0.93      0.81        14
           1       0.90      0.64      0.75        14

    accuracy                           0.79        28
   macro avg       0.81      0.79      0.78        28
weighted avg       0.81      0.79      0.78        28

Mean Average Precision: 0.7571428571428571
Area Under ROC Curve: 0.7857142857142

### expdecay

In [50]:
exps = [np.exp(-i * 0.9) for i in range(1,17)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [51]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  14
TN:  14
FP:  0
FN:  0
F1: 1.0
[[14  0]
 [ 0 14]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14

    accuracy                           1.00        28
   macro avg       1.00      1.00      1.00        28
weighted avg       1.00      1.00      1.00        28

Mean Average Precision: 1.0
Area Under ROC Curve: 1.0
Precision:  1.0
Recall:  1.0
[LibLinear]Edge Operator: Hadamard
TP:  10
TN:  13
FP:  1
FN:  4
F1: 0.8
[[13  1]
 [ 4 10]]
              precision    recall  f1-score   support

           0       0.76      0.93      0.84        14
           1       0.91      0.71      0.80        14

    accuracy                           0.82        28
   macro avg       0.84      0.82      0.82        28
weighted avg       0.84      0.82      0.82        28

Mean Average Precision: 0.7922077922077921
Area Under ROC Curve: 0.8214285714285

### Email-EU - Equal Weekly
### load data

In [56]:
# load the training and testing graph
with open('../data/email_EU/email_equal_weekly_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400

In [57]:
num_snapshots = len(graphs)
num_snapshots

74

In [59]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/email_EU/embeddings/em-email-equal-week-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

74


In [60]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [61]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  242
TN:  1948
FP:  197
FN:  1903
F1: 0.1873065015479876
[[1948  197]
 [1903  242]]
              precision    recall  f1-score   support

           0       0.51      0.91      0.65      2145
           1       0.55      0.11      0.19      2145

   micro avg       0.51      0.51      0.51      4290
   macro avg       0.53      0.51      0.42      4290
weighted avg       0.53      0.51      0.42      4290

Mean Average Precision: 0.5057823725249693
Area Under ROC Curve: 0.5104895104895105
Precision:  0.55125284738041
Recall:  0.11282051282051282
[LibLinear]Edge Operator: Hadamard
TP:  1673
TN:  427
FP:  1718
FN:  472
F1: 0.604407514450867
[[ 427 1718]
 [ 472 1673]]
              precision    recall  f1-score   support

           0       0.47      0.20      0.28      2145
           1       0.49      0.78      0.60      2145

   micro avg       0.49      0.49      0.49      4290
   macro avg       0.48      0.49      0.44      4290
weighted avg   

### sum

In [62]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [63]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1281
TN:  1369
FP:  776
FN:  864
F1: 0.609709662065683
[[1369  776]
 [ 864 1281]]
              precision    recall  f1-score   support

           0       0.61      0.64      0.63      2145
           1       0.62      0.60      0.61      2145

   micro avg       0.62      0.62      0.62      4290
   macro avg       0.62      0.62      0.62      4290
weighted avg       0.62      0.62      0.62      4290

Mean Average Precision: 0.5733075869196433
Area Under ROC Curve: 0.6177156177156178
Precision:  0.6227515799708313
Recall:  0.5972027972027972
[LibLinear]Edge Operator: Hadamard
TP:  1237
TN:  1017
FP:  1128
FN:  908
F1: 0.5485587583148559
[[1017 1128]
 [ 908 1237]]
              precision    recall  f1-score   support

           0       0.53      0.47      0.50      2145
           1       0.52      0.58      0.55      2145

   micro avg       0.53      0.53      0.53      4290
   macro avg       0.53      0.53      0.52      4290
weighted avg 

### expdecay

In [68]:
exps = [np.exp(-i * 0.3) for i in range(1,28)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [69]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  1306
TN:  1276
FP:  869
FN:  839
F1: 0.6046296296296296
[[1276  869]
 [ 839 1306]]
              precision    recall  f1-score   support

           0       0.60      0.59      0.60      2145
           1       0.60      0.61      0.60      2145

   micro avg       0.60      0.60      0.60      4290
   macro avg       0.60      0.60      0.60      4290
weighted avg       0.60      0.60      0.60      4290

Mean Average Precision: 0.5611657155105432
Area Under ROC Curve: 0.6018648018648018
Precision:  0.6004597701149426
Recall:  0.6088578088578088
[LibLinear]Edge Operator: Hadamard
TP:  1529
TN:  696
FP:  1449
FN:  616
F1: 0.5969158696076519
[[ 696 1449]
 [ 616 1529]]
              precision    recall  f1-score   support

           0       0.53      0.32      0.40      2145
           1       0.51      0.71      0.60      2145

   micro avg       0.52      0.52      0.52      4290
   macro avg       0.52      0.52      0.50      4290
weighted avg 