<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#RealityMining-Monthly-w/-Graphwave" data-toc-modified-id="RealityMining-Monthly-w/-Graphwave-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>RealityMining Monthly w/ Graphwave</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-embedding" data-toc-modified-id="Using-only-T-1-embedding-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Using only T-1 embedding</a></span></li><li><span><a href="#Use-0...T-Embeddings" data-toc-modified-id="Use-0...T-Embeddings-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Use 0...T Embeddings</a></span></li></ul></li><li><span><a href="#RealityMining-Weekly" data-toc-modified-id="RealityMining-Weekly-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>RealityMining Weekly</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li><li><span><a href="#RealityMining-Equal" data-toc-modified-id="RealityMining-Equal-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>RealityMining Equal</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li></ul></div>

In [5]:
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
import graphwave
from graphwave.shapes import build_graph
from graphwave.graphwave import *
import pickle

import random
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as metrics
import seaborn as sns
sns.set_style('darkgrid')

%load_ext autoreload
%autoreload 2
np.random.seed(123)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
def edge_features(node_emb_1, node_emb_2, operator):
    
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Concat':
        edge = np.concatenate((node_emb_1, node_emb_2), axis=None) 
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1]
        node_emb_2 = node_embeddings[node_index_2]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    '''
    # generate all possible edges for each node with at least one edge (assume undirected edges)
    for node_1 in nodes_with_edge:
        for node_2 in range(num_nodes):
            test_edges.append((node_1, node_2))
            
            if (node_1, node_2) in edge_list:
                test_edges_labels.append(1)
            else:
                test_edges_labels.append(0)
    '''
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = recall = 0
    try:
        precision = true_positive / (true_positive + false_positive)
        recall = true_positive / (true_positive + false_negative)
        f1 = 2 * (precision * recall) / (precision + recall)
        print("F1: {}".format(f1))
    except:
        print("F1: Error")
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    
    return precision, recall

## RealityMining Monthly w/ Graphwave

In [7]:
#load the graphs 
with open('../data/RealityMining/RealityMining_VC_Monthly.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [8]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/10
Completed: 1/10
Completed: 2/10
Completed: 3/10
Completed: 4/10
Completed: 5/10
Completed: 6/10
Completed: 7/10
Completed: 8/10
Completed: 9/10


### Using only T-1 embedding

In [10]:
num_edge_sample = 400
edge_operator = 'Average' #'Average', 'Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

In [11]:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  218
TN:  222
FP:  0
FN:  4
F1: 0.9909090909090909
[[222   0]
 [  4 218]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       222
           1       1.00      0.98      0.99       222

    accuracy                           0.99       444
   macro avg       0.99      0.99      0.99       444
weighted avg       0.99      0.99      0.99       444

Mean Average Precision: 0.990990990990991
Area Under ROC Curve: 0.990990990990991
Precision:  1.0
Recall:  0.9819819819819819


### Use 0...T Embeddings

In [12]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [13]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  222
TN:  222
FP:  0
FN:  0
F1: 1.0
[[222   0]
 [  0 222]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       222
           1       1.00      1.00      1.00       222

    accuracy                           1.00       444
   macro avg       1.00      1.00      1.00       444
weighted avg       1.00      1.00      1.00       444

Mean Average Precision: 1.0
Area Under ROC Curve: 1.0
Precision:  1.0
Recall:  1.0


# Expotential Sum 

In [15]:
embeddings = chi_list
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  220
TN:  222
FP:  0
FN:  2
F1: 0.9954751131221719
[[222   0]
 [  2 220]]
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       222
           1       1.00      0.99      1.00       222

    accuracy                           1.00       444
   macro avg       1.00      1.00      1.00       444
weighted avg       1.00      1.00      1.00       444

Mean Average Precision: 0.9954954954954955
Area Under ROC Curve: 0.9954954954954955
Precision:  1.0
Recall:  0.990990990990991
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------
[LibLinear]Edge Operator: Concat
TP:  220
TN:  222
FP:  0
FN:  2
F1: 0.9954751131221719
[[222   0]
 [  2 220]]
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       222
           1       1.00      0.99      1.00       222

    accuracy                           1.00   

## RealityMining Weekly

In [16]:
#load the graphs 
with open('../data/RealityMining/RealityMining_VC_Weekly.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [17]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/49
Completed: 1/49
Completed: 2/49
Completed: 3/49
Completed: 4/49
Completed: 5/49
Completed: 6/49
Completed: 7/49
Completed: 8/49
Completed: 9/49
Completed: 10/49
Completed: 11/49
Completed: 12/49
Completed: 13/49
Completed: 14/49
Completed: 15/49
Completed: 16/49
Completed: 17/49
Completed: 18/49
Completed: 19/49
Completed: 20/49
Completed: 21/49
Completed: 22/49
Completed: 23/49
Completed: 24/49
Completed: 25/49
Completed: 26/49
Completed: 27/49
Completed: 28/49
Completed: 35/49
Completed: 36/49
Completed: 37/49
Completed: 38/49
Completed: 39/49
Completed: 40/49
Completed: 41/49
Completed: 42/49
Completed: 43/49
Completed: 44/49
Completed: 45/49
Completed: 46/49
Completed: 47/49
Completed: 48/49


### Using only T-1 Embedding

In [22]:
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    try:
        train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
        test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

        LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

        print("Edge Operator: {}".format(edge_operator))
        predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
        precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
        print('Precision: ', precision)
        print('Recall: ', recall)
    except:
        print("Edge Operator: {} ERROR".format(edge_operator))

[LibLinear]Edge Operator: Concat
TP:  0
TN:  28
FP:  0
FN:  28
F1: Error
[[28  0]
 [28  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        28
           1       0.00      0.00      0.00        28

    accuracy                           0.50        56
   macro avg       0.25      0.50      0.33        56
weighted avg       0.25      0.50      0.33        56

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0


  'precision', 'predicted', average, warn_for)


### Pure Sum

In [6]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [23]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  28
TN:  28
FP:  0
FN:  0
F1: 1.0
[[28  0]
 [ 0 28]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        28

    accuracy                           1.00        56
   macro avg       1.00      1.00      1.00        56
weighted avg       1.00      1.00      1.00        56

Mean Average Precision: 1.0
Area Under ROC Curve: 1.0
Precision:  1.0
Recall:  1.0


### Exponential Sum

In [24]:
embeddings = chi_list
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  0
TN:  28
FP:  0
FN:  28
F1: Error
[[28  0]
 [28  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        28
           1       0.00      0.00      0.00        28

    accuracy                           0.50        56
   macro avg       0.25      0.50      0.33        56
weighted avg       0.25      0.50      0.33        56

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------


  'precision', 'predicted', average, warn_for)


[LibLinear]Edge Operator: Concat
TP:  0
TN:  28
FP:  0
FN:  28
F1: Error
[[28  0]
 [28  0]]
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        28
           1       0.00      0.00      0.00        28

    accuracy                           0.50        56
   macro avg       0.25      0.50      0.33        56
weighted avg       0.25      0.50      0.33        56

Mean Average Precision: 0.5
Area Under ROC Curve: 0.5
Precision:  0
Recall:  0
------------ END: 0.9 ---------------
------------ BEGIN: 0.5 ---------------


  'precision', 'predicted', average, warn_for)


[LibLinear]Edge Operator: Concat
TP:  28
TN:  28
FP:  0
FN:  0
F1: 1.0
[[28  0]
 [ 0 28]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        28

    accuracy                           1.00        56
   macro avg       1.00      1.00      1.00        56
weighted avg       1.00      1.00      1.00        56

Mean Average Precision: 1.0
Area Under ROC Curve: 1.0
Precision:  1.0
Recall:  1.0
------------ END: 0.5 ---------------
------------ BEGIN: 0.3 ---------------
[LibLinear]Edge Operator: Concat
TP:  28
TN:  28
FP:  0
FN:  0
F1: 1.0
[[28  0]
 [ 0 28]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        28

    accuracy                           1.00        56
   macro avg       1.00      1.00      1.00        56
weighted avg       1.00      1.00      1.00        56

Mean 

## RealityMining Equal

In [25]:
#load the graphs 
with open('../data/RealityMining/RM_equal_num_edges_snapshots.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [26]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/50
Completed: 1/50
Completed: 2/50
Completed: 3/50
Completed: 4/50
Completed: 5/50
Completed: 6/50
Completed: 7/50
Completed: 8/50
Completed: 9/50
Completed: 10/50
Completed: 11/50
Completed: 12/50
Completed: 13/50
Completed: 14/50
Completed: 15/50
Completed: 16/50
Completed: 17/50
Completed: 18/50
Completed: 19/50
Completed: 20/50
Completed: 21/50
Completed: 22/50
Completed: 23/50
Completed: 24/50
Completed: 25/50
Completed: 26/50
Completed: 27/50
Completed: 28/50
Completed: 29/50
Completed: 30/50
Completed: 31/50
Completed: 32/50
Completed: 33/50
Completed: 34/50
Completed: 35/50
Completed: 36/50
Completed: 37/50
Completed: 38/50
Completed: 39/50
Completed: 40/50
Completed: 41/50
Completed: 42/50
Completed: 43/50
Completed: 44/50
Completed: 45/50
Completed: 46/50
Completed: 47/50
Completed: 48/50
Completed: 49/50


### Using only T-1 Embedding

In [31]:
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    try:
        train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
        test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

        LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

        print("Edge Operator: {}".format(edge_operator))
        predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
        precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
        print('Precision: ', precision)
        print('Recall: ', recall)
    except:
        print("Edge Operator: {} ERROR".format(edge_operator))

[LibLinear]Edge Operator: Concat
TP:  320
TN:  304
FP:  18
FN:  2
F1: 0.9696969696969696
[[304  18]
 [  2 320]]
              precision    recall  f1-score   support

           0       0.99      0.94      0.97       322
           1       0.95      0.99      0.97       322

    accuracy                           0.97       644
   macro avg       0.97      0.97      0.97       644
weighted avg       0.97      0.97      0.97       644

Mean Average Precision: 0.9439707449740895
Area Under ROC Curve: 0.968944099378882
Precision:  0.9467455621301775
Recall:  0.9937888198757764


### Pure Sum

In [28]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [30]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  322
TN:  321
FP:  1
FN:  0
F1: 0.9984496124031007
[[321   1]
 [  0 322]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       322
           1       1.00      1.00      1.00       322

    accuracy                           1.00       644
   macro avg       1.00      1.00      1.00       644
weighted avg       1.00      1.00      1.00       644

Mean Average Precision: 0.9969040247678018
Area Under ROC Curve: 0.9984472049689441
Precision:  0.9969040247678018
Recall:  1.0


### Exponential Sum

In [32]:
embeddings = chi_list
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  256
TN:  316
FP:  6
FN:  66
F1: 0.8767123287671231
[[316   6]
 [ 66 256]]
              precision    recall  f1-score   support

           0       0.83      0.98      0.90       322
           1       0.98      0.80      0.88       322

    accuracy                           0.89       644
   macro avg       0.90      0.89      0.89       644
weighted avg       0.90      0.89      0.89       644

Mean Average Precision: 0.8793087098762504
Area Under ROC Curve: 0.888198757763975
Precision:  0.9770992366412213
Recall:  0.7950310559006211
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------
[LibLinear]Edge Operator: Concat
TP:  251
TN:  319
FP:  3
FN:  71
F1: 0.8715277777777778
[[319   3]
 [ 71 251]]
              precision    recall  f1-score   support

           0       0.82      0.99      0.90       322
           1       0.99      0.78      0.87       322

    accuracy                 