<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#CollegeMsg-Monthly-w/-Graphwave" data-toc-modified-id="CollegeMsg-Monthly-w/-Graphwave-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>CollegeMsg Monthly w/ Graphwave</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-embedding" data-toc-modified-id="Using-only-T-1-embedding-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Using only T-1 embedding</a></span></li><li><span><a href="#Use-0...T-Embeddings" data-toc-modified-id="Use-0...T-Embeddings-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Use 0...T Embeddings</a></span></li><li><span><a href="#Expotential-Sum" data-toc-modified-id="Expotential-Sum-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Expotential Sum</a></span></li></ul></li><li><span><a href="#CollegeMsg-Weekly" data-toc-modified-id="CollegeMsg-Weekly-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>CollegeMsg Weekly</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li><li><span><a href="#CollegeMsg-Equal-(by-Month)" data-toc-modified-id="CollegeMsg-Equal-(by-Month)-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>CollegeMsg Equal (by Month)</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li></ul></div>

In [7]:
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
import graphwave
from graphwave.shapes import build_graph
from graphwave.graphwave import *
import pickle

import random
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as metrics
import seaborn as sns
sns.set_style('darkgrid')

%load_ext autoreload
%autoreload 2
np.random.seed(123)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
def edge_features(node_emb_1, node_emb_2, operator):
    
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1]
        node_emb_2 = node_embeddings[node_index_2]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

'''
def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    # generate all possible edges for each node with at least one edge (assume undirected edges)
    for node_1 in nodes_with_edge:
        for node_2 in range(num_nodes):
            test_edges.append((node_1, node_2))
            
            if (node_1, node_2) in edge_list:
                test_edges_labels.append(1)
            else:
                test_edges_labels.append(0)
            
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels
'''
def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    f1 = 2 * (precision * recall) / (precision + recall)
    print("F1: {}".format(f1))
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    return precision, recall

## CollegeMsg Monthly w/ Graphwave

In [34]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Month_MSG/msg_1_month_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [4]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/6
Completed: 1/6
Completed: 2/6
Completed: 3/6
Completed: 4/6
Completed: 5/6


### Using only T-1 embedding

In [5]:
num_edge_sample = 400
edge_operator = 'Average' #'Average', 'Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

In [9]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  314
TN:  548
FP:  42
FN:  276
F1: 0.6638477801268499
[[548  42]
 [276 314]]
              precision    recall  f1-score   support

           0       0.67      0.93      0.78       590
           1       0.88      0.53      0.66       590

    accuracy                           0.73      1180
   macro avg       0.77      0.73      0.72      1180
weighted avg       0.77      0.73      0.72      1180

Mean Average Precision: 0.703313654541992
Area Under ROC Curve: 0.7305084745762712
Precision:  0.8820224719101124
Recall:  0.5322033898305085
[LibLinear]Edge Operator: Hadamard
TP:  288
TN:  561
FP:  29
FN:  302
F1: 0.6350606394707828
[[561  29]
 [302 288]]
              precision    recall  f1-score   support

           0       0.65      0.95      0.77       590
           1       0.91      0.49      0.64       590

    accuracy                           0.72      1180
   macro avg       0.78      0.72      0.70      1180
weighted avg       0.78     

### Use 0...T Embeddings

In [10]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [11]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  376
TN:  519
FP:  71
FN:  214
F1: 0.7251687560270009
[[519  71]
 [214 376]]
              precision    recall  f1-score   support

           0       0.71      0.88      0.78       590
           1       0.84      0.64      0.73       590

    accuracy                           0.76      1180
   macro avg       0.77      0.76      0.75      1180
weighted avg       0.77      0.76      0.75      1180

Mean Average Precision: 0.7174193303757631
Area Under ROC Curve: 0.7584745762711864
Precision:  0.8411633109619687
Recall:  0.6372881355932203
[LibLinear]Edge Operator: Hadamard
TP:  392
TN:  524
FP:  66
FN:  198
F1: 0.748091603053435
[[524  66]
 [198 392]]
              precision    recall  f1-score   support

           0       0.73      0.89      0.80       590
           1       0.86      0.66      0.75       590

    accuracy                           0.78      1180
   macro avg       0.79      0.78      0.77      1180
weighted avg       0.79     

### Expotential Sum 

In [33]:
exps = [np.exp(-i * 0.9) for i in range(1,7)]
exps

[]

In [13]:
prev_embedding = np.zeros((chi_list[0]).shape) 
for c,e in zip(chi_list[0:-1],exps):
    prev_embedding += e * c 

In [14]:
cur_embedding = prev_embedding + chi_list[-1]

In [15]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    try:
        train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
        test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

        LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

        print("Edge Operator: {}".format(edge_operator))
        predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
        precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
        print('Precision: ', precision)
        print('Recall: ', recall)
    except:
        print("Edge Operator: {} ERROR".format(edge_operator))

[LibLinear]Edge Operator: Average
TP:  206
TN:  580
FP:  10
FN:  384
F1: 0.5111662531017369
[[580  10]
 [384 206]]
              precision    recall  f1-score   support

           0       0.60      0.98      0.75       590
           1       0.95      0.35      0.51       590

    accuracy                           0.67      1180
   macro avg       0.78      0.67      0.63      1180
weighted avg       0.78      0.67      0.63      1180

Mean Average Precision: 0.6584118016321405
Area Under ROC Curve: 0.6661016949152544
Precision:  0.9537037037037037
Recall:  0.34915254237288135
[LibLinear]Edge Operator: Hadamard
TP:  0
TN:  590
FP:  0
FN:  590
Edge Operator: Hadamard ERROR
[LibLinear]Edge Operator: Weighted-L1
TP:  444
TN:  376
FP:  214
FN:  146
F1: 0.7115384615384616
[[376 214]
 [146 444]]
              precision    recall  f1-score   support

           0       0.72      0.64      0.68       590
           1       0.67      0.75      0.71       590

    accuracy                     

## CollegeMsg Weekly

In [16]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Week_MSG/msg_1_week_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [17]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/27
Completed: 1/27
Completed: 2/27
Completed: 3/27
Completed: 4/27
Completed: 5/27
Completed: 6/27
Completed: 7/27
Completed: 8/27
Completed: 9/27
Completed: 10/27
Completed: 11/27
Completed: 12/27
Completed: 13/27
Completed: 14/27
Completed: 15/27
Completed: 16/27
Completed: 17/27
Completed: 18/27
Completed: 19/27
Completed: 20/27
Completed: 21/27
Completed: 22/27
Completed: 23/27
Completed: 24/27
Completed: 25/27
Completed: 26/27


### Using only T-1 Embedding

In [18]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  12
TN:  84
FP:  2
FN:  74
F1: 0.24
[[84  2]
 [74 12]]
              precision    recall  f1-score   support

           0       0.53      0.98      0.69        86
           1       0.86      0.14      0.24        86

    accuracy                           0.56       172
   macro avg       0.69      0.56      0.46       172
weighted avg       0.69      0.56      0.46       172

Mean Average Precision: 0.5498338870431894
Area Under ROC Curve: 0.5581395348837209
Precision:  0.8571428571428571
Recall:  0.13953488372093023
[LibLinear]Edge Operator: Hadamard
TP:  14
TN:  84
FP:  2
FN:  72
F1: 0.2745098039215686
[[84  2]
 [72 14]]
              precision    recall  f1-score   support

           0       0.54      0.98      0.69        86
           1       0.88      0.16      0.27        86

    accuracy                           0.57       172
   macro avg       0.71      0.57      0.48       172
weighted avg       0.71      0.57      0.48       172

M

### Pure Sum

In [19]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [20]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  44
TN:  78
FP:  8
FN:  42
F1: 0.6376811594202898
[[78  8]
 [42 44]]
              precision    recall  f1-score   support

           0       0.65      0.91      0.76        86
           1       0.85      0.51      0.64        86

    accuracy                           0.71       172
   macro avg       0.75      0.71      0.70       172
weighted avg       0.75      0.71      0.70       172

Mean Average Precision: 0.6771019677996422
Area Under ROC Curve: 0.7093023255813954
Precision:  0.8461538461538461
Recall:  0.5116279069767442
[LibLinear]Edge Operator: Hadamard
TP:  44
TN:  76
FP:  10
FN:  42
F1: 0.6285714285714286
[[76 10]
 [42 44]]
              precision    recall  f1-score   support

           0       0.64      0.88      0.75        86
           1       0.81      0.51      0.63        86

    accuracy                           0.70       172
   macro avg       0.73      0.70      0.69       172
weighted avg       0.73      0.70      0.6

### Exponential Sum

In [21]:
exps = [np.exp(-i * 1) for i in range(1,len(graphs[:-1]))]
exps;

In [22]:
prev_embedding = np.zeros((chi_list[0]).shape) 
for c,e in zip(chi_list[0:-1],exps):
    prev_embedding += e * c 
cur_embedding = prev_embedding + chi_list[-1]

In [23]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    try:
        train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
        test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

        LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

        print("Edge Operator: {}".format(edge_operator))
        predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
        precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
        print('Precision: ', precision)
        print('Recall: ', recall)
    except: 
        print("Edge Operator: {} ERROR".format(edge_operator))

[LibLinear]Edge Operator: Average
TP:  0
TN:  86
FP:  0
FN:  86
Edge Operator: Average ERROR
[LibLinear]Edge Operator: Hadamard
TP:  0
TN:  86
FP:  0
FN:  86
Edge Operator: Hadamard ERROR
[LibLinear]Edge Operator: Weighted-L1
TP:  32
TN:  69
FP:  17
FN:  54
F1: 0.47407407407407404
[[69 17]
 [54 32]]
              precision    recall  f1-score   support

           0       0.56      0.80      0.66        86
           1       0.65      0.37      0.47        86

    accuracy                           0.59       172
   macro avg       0.61      0.59      0.57       172
weighted avg       0.61      0.59      0.57       172

Mean Average Precision: 0.5569530137636449
Area Under ROC Curve: 0.5872093023255814
Precision:  0.6530612244897959
Recall:  0.37209302325581395
[LibLinear]Edge Operator: Weighted-L2
TP:  14
TN:  81
FP:  5
FN:  72
F1: 0.2666666666666667
[[81  5]
 [72 14]]
              precision    recall  f1-score   support

           0       0.53      0.94      0.68        86
        

## CollegeMsg Equal (by Month)

In [24]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Equal_Edge_MSG/msg_equal_edge_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [25]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/27
Completed: 1/27
Completed: 2/27
Completed: 3/27
Completed: 4/27
Completed: 5/27
Completed: 6/27
Completed: 7/27
Completed: 8/27
Completed: 9/27
Completed: 10/27
Completed: 11/27
Completed: 12/27
Completed: 13/27
Completed: 14/27
Completed: 15/27
Completed: 16/27
Completed: 17/27
Completed: 18/27
Completed: 19/27
Completed: 20/27
Completed: 21/27
Completed: 22/27
Completed: 23/27
Completed: 24/27
Completed: 25/27
Completed: 26/27


### Using only T-1 Embedding

In [26]:
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  688
TN:  979
FP:  111
FN:  402
F1: 0.7284277395447327
[[979 111]
 [402 688]]
              precision    recall  f1-score   support

           0       0.71      0.90      0.79      1090
           1       0.86      0.63      0.73      1090

    accuracy                           0.76      2180
   macro avg       0.78      0.76      0.76      2180
weighted avg       0.78      0.76      0.76      2180

Mean Average Precision: 0.7279087391349279
Area Under ROC Curve: 0.7646788990825688
Precision:  0.8610763454317898
Recall:  0.6311926605504588
[LibLinear]Edge Operator: Hadamard
TP:  632
TN:  1038
FP:  52
FN:  458
F1: 0.7125140924464487
[[1038   52]
 [ 458  632]]
              precision    recall  f1-score   support

           0       0.69      0.95      0.80      1090
           1       0.92      0.58      0.71      1090

    accuracy                           0.77      2180
   macro avg       0.81      0.77      0.76      2180
weighted avg       0.

### Pure Sum

In [27]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [28]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Average
TP:  618
TN:  876
FP:  214
FN:  472
F1: 0.6430801248699272
[[876 214]
 [472 618]]
              precision    recall  f1-score   support

           0       0.65      0.80      0.72      1090
           1       0.74      0.57      0.64      1090

    accuracy                           0.69      2180
   macro avg       0.70      0.69      0.68      2180
weighted avg       0.70      0.69      0.68      2180

Mean Average Precision: 0.6376543754410726
Area Under ROC Curve: 0.6853211009174311
Precision:  0.7427884615384616
Recall:  0.5669724770642202
[LibLinear]Edge Operator: Hadamard
TP:  664
TN:  888
FP:  202
FN:  426
F1: 0.6789366053169734
[[888 202]
 [426 664]]
              precision    recall  f1-score   support

           0       0.68      0.81      0.74      1090
           1       0.77      0.61      0.68      1090

    accuracy                           0.71      2180
   macro avg       0.72      0.71      0.71      2180
weighted avg       0.72  

### Exponential Sum

In [29]:
exps = [np.exp(-i * 1) for i in range(1,len(graphs[:-1]))]
exps;

In [30]:
prev_embedding = np.zeros((chi_list[0]).shape) 
for c,e in zip(chi_list[0:-1],exps):
    prev_embedding += e * c 
cur_embedding = prev_embedding + chi_list[-1]

In [31]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    try:
        train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
        test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

        LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

        print("Edge Operator: {}".format(edge_operator))
        predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
        precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
        print('Precision: ', precision)
        print('Recall: ', recall)
    except: 
        print("Edge Operator: {} ERROR".format(edge_operator))

[LibLinear]Edge Operator: Average
TP:  418
TN:  1067
FP:  23
FN:  672
F1: 0.5460483344219464
[[1067   23]
 [ 672  418]]
              precision    recall  f1-score   support

           0       0.61      0.98      0.75      1090
           1       0.95      0.38      0.55      1090

    accuracy                           0.68      2180
   macro avg       0.78      0.68      0.65      2180
weighted avg       0.78      0.68      0.65      2180

Mean Average Precision: 0.671742703197487
Area Under ROC Curve: 0.6811926605504587
Precision:  0.9478458049886621
Recall:  0.3834862385321101
[LibLinear]Edge Operator: Hadamard
TP:  0
TN:  1090
FP:  0
FN:  1090
Edge Operator: Hadamard ERROR
[LibLinear]Edge Operator: Weighted-L1
TP:  894
TN:  633
FP:  457
FN:  196
F1: 0.7324866857845146
[[633 457]
 [196 894]]
              precision    recall  f1-score   support

           0       0.76      0.58      0.66      1090
           1       0.66      0.82      0.73      1090

    accuracy                