In [1]:
import random

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
import pickle
import sklearn.metrics as metrics

### functions

In [6]:
def edge_features(node_emb_1, node_emb_2, operator):
    node_emb_1 = np.asfarray(node_emb_1,float)
    node_emb_2 = np.asfarray(node_emb_2, float)
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Concat':
        edge = np.concatenate((node_emb_1, node_emb_2), axis=None) 
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1-1]
        node_emb_2 = node_embeddings[node_index_2-1]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    if (precision + recall) != 0.0:
        f1 = 2 * (precision * recall) / (precision + recall)
        print("F1: {}".format(f1))
    else:
        print("F1: Divide-by-zero")
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    return precision, recall

### College Message - Monthly
### load data

In [3]:
# load the training and testing graph
with open('./graphs/msg_1_month_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400
edge_operator = 'Weighted-L2'
num_snapshots = 7

In [4]:
emb_list = []
for i in range(num_snapshots):
    file = './LINE/msg_month/em-msg-month-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

7


### independent / no combination

In [5]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [7]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  68
TN:  504
FP:  86
FN:  522
F1: 0.1827956989247312
[[504  86]
 [522  68]]
              precision    recall  f1-score   support

           0       0.49      0.85      0.62       590
           1       0.44      0.12      0.18       590

    accuracy                           0.48      1180
   macro avg       0.47      0.48      0.40      1180
weighted avg       0.47      0.48      0.40      1180

Mean Average Precision: 0.4932643627558882
Area Under ROC Curve: 0.48474576271186437
Precision:  0.44155844155844154
Recall:  0.1152542372881356
[LibLinear]Edge Operator: Hadamard
TP:  400
TN:  206
FP:  384
FN:  190
F1: 0.5822416302765648
[[206 384]
 [190 400]]
              precision    recall  f1-score   support

           0       0.52      0.35      0.42       590
           1       0.51      0.68      0.58       590

    accuracy                           0.51      1180
   macro avg       0.52      0.51      0.50      1180
weighted avg       0.52  

### sum

In [9]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [10]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  360
TN:  396
FP:  194
FN:  230
F1: 0.6293706293706294
[[396 194]
 [230 360]]
              precision    recall  f1-score   support

           0       0.63      0.67      0.65       590
           1       0.65      0.61      0.63       590

    accuracy                           0.64      1180
   macro avg       0.64      0.64      0.64      1180
weighted avg       0.64      0.64      0.64      1180

Mean Average Precision: 0.5914152848314262
Area Under ROC Curve: 0.640677966101695
Precision:  0.6498194945848376
Recall:  0.6101694915254238
[LibLinear]Edge Operator: Hadamard
TP:  402
TN:  286
FP:  304
FN:  188
F1: 0.6203703703703703
[[286 304]
 [188 402]]
              precision    recall  f1-score   support

           0       0.60      0.48      0.54       590
           1       0.57      0.68      0.62       590

    accuracy                           0.58      1180
   macro avg       0.59      0.58      0.58      1180
weighted avg       0.59   

### expdecay

In [20]:
exps = [np.exp(-i * 0.3) for i in range(1,8)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c

In [21]:
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [22]:
for edge_operator in [ 'Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  338
TN:  419
FP:  171
FN:  252
F1: 0.6151046405823476
[[419 171]
 [252 338]]
              precision    recall  f1-score   support

           0       0.62      0.71      0.66       590
           1       0.66      0.57      0.62       590

    accuracy                           0.64      1180
   macro avg       0.64      0.64      0.64      1180
weighted avg       0.64      0.64      0.64      1180

Mean Average Precision: 0.593979554460391
Area Under ROC Curve: 0.6415254237288136
Precision:  0.6640471512770137
Recall:  0.5728813559322034
[LibLinear]Edge Operator: Hadamard
TP:  470
TN:  155
FP:  435
FN:  120
F1: 0.6287625418060201
[[155 435]
 [120 470]]
              precision    recall  f1-score   support

           0       0.56      0.26      0.36       590
           1       0.52      0.80      0.63       590

    accuracy                           0.53      1180
   macro avg       0.54      0.53      0.49      1180
weighted avg       0.54   

### College Message - Weekly
### load data

In [23]:
# load the training and testing graph
with open('./graphs/msg_1_week_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400
num_snapshots = 28

In [25]:
emb_list = []
for i in range(num_snapshots):
    file = './LINE/msg_week/em-msg-week-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

28


### independent / no combination

In [26]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [28]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  2
TN:  78
FP:  8
FN:  84
F1: 0.041666666666666664
[[78  8]
 [84  2]]
              precision    recall  f1-score   support

           0       0.48      0.91      0.63        86
           1       0.20      0.02      0.04        86

    accuracy                           0.47       172
   macro avg       0.34      0.47      0.34       172
weighted avg       0.34      0.47      0.34       172

Mean Average Precision: 0.49302325581395345
Area Under ROC Curve: 0.4651162790697675
Precision:  0.2
Recall:  0.023255813953488372
[LibLinear]Edge Operator: Hadamard
TP:  22
TN:  73
FP:  13
FN:  64
F1: 0.36363636363636365
[[73 13]
 [64 22]]
              precision    recall  f1-score   support

           0       0.53      0.85      0.65        86
           1       0.63      0.26      0.36        86

    accuracy                           0.55       172
   macro avg       0.58      0.55      0.51       172
weighted avg       0.58      0.55      0.51       17

### sum

In [29]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [30]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  32
TN:  73
FP:  13
FN:  54
F1: 0.4885496183206108
[[73 13]
 [54 32]]
              precision    recall  f1-score   support

           0       0.57      0.85      0.69        86
           1       0.71      0.37      0.49        86

    accuracy                           0.61       172
   macro avg       0.64      0.61      0.59       172
weighted avg       0.64      0.61      0.59       172

Mean Average Precision: 0.5785529715762274
Area Under ROC Curve: 0.6104651162790697
Precision:  0.7111111111111111
Recall:  0.37209302325581395
[LibLinear]Edge Operator: Hadamard
TP:  28
TN:  66
FP:  20
FN:  58
F1: 0.41791044776119407
[[66 20]
 [58 28]]
              precision    recall  f1-score   support

           0       0.53      0.77      0.63        86
           1       0.58      0.33      0.42        86

    accuracy                           0.55       172
   macro avg       0.56      0.55      0.52       172
weighted avg       0.56      0.55      

### expdecay

In [35]:
exps = [np.exp(-i * 0.3) for i in range(1,29)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [36]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  32
TN:  64
FP:  22
FN:  54
F1: 0.45714285714285713
[[64 22]
 [54 32]]
              precision    recall  f1-score   support

           0       0.54      0.74      0.63        86
           1       0.59      0.37      0.46        86

    accuracy                           0.56       172
   macro avg       0.57      0.56      0.54       172
weighted avg       0.57      0.56      0.54       172

Mean Average Precision: 0.5344530577088716
Area Under ROC Curve: 0.5581395348837209
Precision:  0.5925925925925926
Recall:  0.37209302325581395
[LibLinear]Edge Operator: Hadamard
TP:  74
TN:  38
FP:  48
FN:  12
F1: 0.7115384615384615
[[38 48]
 [12 74]]
              precision    recall  f1-score   support

           0       0.76      0.44      0.56        86
           1       0.61      0.86      0.71        86

    accuracy                           0.65       172
   macro avg       0.68      0.65      0.64       172
weighted avg       0.68      0.65      

### RM -  Equal Edges
### load data

In [7]:
# load the training and testing graph
with open('../data/RM_sparse/graphs/RM_equal_num_edges_snapshots.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400
edge_operator = 'Weighted-L2'
num_snapshots = 51

In [8]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/RM_sparse/embeddings/em-equal_edge-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

51


### independent / no combination

In [9]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [10]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  3
TN:  317
FP:  5
FN:  319
F1: 0.01818181818181818
[[317   5]
 [319   3]]
              precision    recall  f1-score   support

           0       0.50      0.98      0.66       322
           1       0.38      0.01      0.02       322

   micro avg       0.50      0.50      0.50       644
   macro avg       0.44      0.50      0.34       644
weighted avg       0.44      0.50      0.34       644

Mean Average Precision: 0.4988354037267081
Area Under ROC Curve: 0.49689440993788825
Precision:  0.375
Recall:  0.009316770186335404
[LibLinear]Edge Operator: Hadamard
TP:  47
TN:  269
FP:  53
FN:  275
F1: 0.22274881516587677
[[269  53]
 [275  47]]
              precision    recall  f1-score   support

           0       0.49      0.84      0.62       322
           1       0.47      0.15      0.22       322

   micro avg       0.49      0.49      0.49       644
   macro avg       0.48      0.49      0.42       644
weighted avg       0.48      0.49      

### sum

In [15]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [16]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  306
TN:  317
FP:  5
FN:  16
F1: 0.9668246445497631
[[317   5]
 [ 16 306]]
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       322
           1       0.98      0.95      0.97       322

   micro avg       0.97      0.97      0.97       644
   macro avg       0.97      0.97      0.97       644
weighted avg       0.97      0.97      0.97       644

Mean Average Precision: 0.9598769746959319
Area Under ROC Curve: 0.967391304347826
Precision:  0.9839228295819936
Recall:  0.9503105590062112
[LibLinear]Edge Operator: Hadamard
TP:  118
TN:  256
FP:  66
FN:  204
F1: 0.46640316205533605
[[256  66]
 [204 118]]
              precision    recall  f1-score   support

           0       0.56      0.80      0.65       322
           1       0.64      0.37      0.47       322

   micro avg       0.58      0.58      0.58       644
   macro avg       0.60      0.58      0.56       644
weighted avg       0.60      

### expdecay

In [21]:
exps = [np.exp(-i * 0.3) for i in range(1,51)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [22]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  218
TN:  318
FP:  4
FN:  104
F1: 0.801470588235294
[[318   4]
 [104 218]]
              precision    recall  f1-score   support

           0       0.75      0.99      0.85       322
           1       0.98      0.68      0.80       322

   micro avg       0.83      0.83      0.83       644
   macro avg       0.87      0.83      0.83       644
weighted avg       0.87      0.83      0.83       644

Mean Average Precision: 0.826310782832522
Area Under ROC Curve: 0.8322981366459626
Precision:  0.9819819819819819
Recall:  0.6770186335403726
[LibLinear]Edge Operator: Hadamard
TP:  75
TN:  314
FP:  8
FN:  247
F1: 0.3703703703703704
[[314   8]
 [247  75]]
              precision    recall  f1-score   support

           0       0.56      0.98      0.71       322
           1       0.90      0.23      0.37       322

   micro avg       0.60      0.60      0.60       644
   macro avg       0.73      0.60      0.54       644
weighted avg       0.73      0.6

### College Message - Equal Edges
### load data

In [27]:
# load the training and testing graph
with open('../data/CollegeMsg/graphs/msg_equal_edge_undir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

# parameters
num_edge_sample = 400
edge_operator = 'Weighted-L2'
num_snapshots = 28

In [28]:
emb_list = []
for i in range(num_snapshots):
    file = '../data/CollegeMsg/embeddings/em-msg-equal-edge-' + str(i) + '.npy'
    node_embedding = np.load(file)
    emb_list.append(node_embedding)
print(len(emb_list))

28


In [29]:
node_embeddings_training = emb_list[-3]
node_embeddings_testing = emb_list[-2]

In [30]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  195
TN:  352
FP:  193
FN:  350
F1: 0.4180064308681672
[[352 193]
 [350 195]]
              precision    recall  f1-score   support

           0       0.50      0.65      0.56       545
           1       0.50      0.36      0.42       545

   micro avg       0.50      0.50      0.50      1090
   macro avg       0.50      0.50      0.49      1090
weighted avg       0.50      0.50      0.49      1090

Mean Average Precision: 0.5009221602194268
Area Under ROC Curve: 0.5018348623853212
Precision:  0.5025773195876289
Recall:  0.3577981651376147
[LibLinear]Edge Operator: Hadamard
TP:  448
TN:  101
FP:  444
FN:  97
F1: 0.6235212247738343
[[101 444]
 [ 97 448]]
              precision    recall  f1-score   support

           0       0.51      0.19      0.27       545
           1       0.50      0.82      0.62       545

   micro avg       0.50      0.50      0.50      1090
   macro avg       0.51      0.50      0.45      1090
weighted avg       0.51   

### sum

In [31]:
for i in range(len(emb_list)):
    emb_list[i] = np.asfarray(emb_list[i],float)
node_embeddings_training = np.sum(np.asarray(emb_list[0:-2]),axis=0)
node_embeddings_testing = np.sum(np.asarray(emb_list[0:-1]),axis=0)

In [32]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  340
TN:  364
FP:  181
FN:  205
F1: 0.6378986866791745
[[364 181]
 [205 340]]
              precision    recall  f1-score   support

           0       0.64      0.67      0.65       545
           1       0.65      0.62      0.64       545

   micro avg       0.65      0.65      0.65      1090
   macro avg       0.65      0.65      0.65      1090
weighted avg       0.65      0.65      0.65      1090

Mean Average Precision: 0.5951944918910352
Area Under ROC Curve: 0.6458715596330276
Precision:  0.6525911708253359
Recall:  0.6238532110091743
[LibLinear]Edge Operator: Hadamard
TP:  316
TN:  296
FP:  249
FN:  229
F1: 0.5693693693693694
[[296 249]
 [229 316]]
              precision    recall  f1-score   support

           0       0.56      0.54      0.55       545
           1       0.56      0.58      0.57       545

   micro avg       0.56      0.56      0.56      1090
   macro avg       0.56      0.56      0.56      1090
weighted avg       0.56  

### expdecay

In [38]:
exps = [np.exp(-i * 0.3) for i in range(1,28)]
node_embeddings_training = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-2],exps[:-1]):
    node_embeddings_training += e * c 
node_embeddings_testing = np.zeros((emb_list[0]).shape) 
for c,e in zip(emb_list[0:-1],exps[:-1]):
    node_embeddings_testing += e * c 

In [39]:
for edge_operator in ['Average', 'Hadamard','Weighted-L1','Weighted-L2', 'Concat']:
    # generate the training set
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, node_embeddings_training, edge_operator)
    # generate the testing set
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, node_embeddings_testing, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  288
TN:  432
FP:  113
FN:  257
F1: 0.6088794926004227
[[432 113]
 [257 288]]
              precision    recall  f1-score   support

           0       0.63      0.79      0.70       545
           1       0.72      0.53      0.61       545

   micro avg       0.66      0.66      0.66      1090
   macro avg       0.67      0.66      0.65      1090
weighted avg       0.67      0.66      0.65      1090

Mean Average Precision: 0.615308060124917
Area Under ROC Curve: 0.6605504587155964
Precision:  0.7182044887780549
Recall:  0.5284403669724771
[LibLinear]Edge Operator: Hadamard
TP:  359
TN:  219
FP:  326
FN:  186
F1: 0.583739837398374
[[219 326]
 [186 359]]
              precision    recall  f1-score   support

           0       0.54      0.40      0.46       545
           1       0.52      0.66      0.58       545

   micro avg       0.53      0.53      0.53      1090
   macro avg       0.53      0.53      0.52      1090
weighted avg       0.53    