<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#CollegeMsg-Monthly-w/-Graphwave" data-toc-modified-id="CollegeMsg-Monthly-w/-Graphwave-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>CollegeMsg Monthly w/ Graphwave</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-embedding" data-toc-modified-id="Using-only-T-1-embedding-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Using only T-1 embedding</a></span></li><li><span><a href="#Use-0...T-Embeddings" data-toc-modified-id="Use-0...T-Embeddings-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Use 0...T Embeddings</a></span></li><li><span><a href="#Expotential-Sum" data-toc-modified-id="Expotential-Sum-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Expotential Sum</a></span></li></ul></li><li><span><a href="#CollegeMsg-Weekly" data-toc-modified-id="CollegeMsg-Weekly-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>CollegeMsg Weekly</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li><li><span><a href="#CollegeMsg-Equal-(by-Month)" data-toc-modified-id="CollegeMsg-Equal-(by-Month)-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>CollegeMsg Equal (by Month)</a></span><ul class="toc-item"><li><span><a href="#Using-only-T-1-Embedding" data-toc-modified-id="Using-only-T-1-Embedding-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Using only T-1 Embedding</a></span></li><li><span><a href="#Pure-Sum" data-toc-modified-id="Pure-Sum-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Pure Sum</a></span></li><li><span><a href="#Exponential-Sum" data-toc-modified-id="Exponential-Sum-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Exponential Sum</a></span></li></ul></li></ul></div>

In [1]:
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
import graphwave
from graphwave.shapes import build_graph
from graphwave.graphwave import *
import pickle

import random
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as metrics
import seaborn as sns
sns.set_style('darkgrid')

%load_ext autoreload
%autoreload 2
np.random.seed(123)


In [4]:
def edge_features(node_emb_1, node_emb_2, operator):
    
    # combine two nodes' embeddings with specificed operator
    if operator == 'Average':
        edge = [((x + y) / 2.0) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Hadamard':
        edge = [(x * y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L1':
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Weighted-L2':
        edge = [abs(x - y)**2 for x,y in zip(node_emb_1, node_emb_2)]
    elif operator == 'Concat':
        edge = np.concatenate((node_emb_1, node_emb_2), axis=None)
    else:
        print("Generate edge features: Operator not supported")
        print("Use default operator: Weighted-L1")
        edge = [abs(x - y) for x,y in zip(node_emb_1, node_emb_2)]
        
    return edge
def generate_edge_features(edge_list, node_embeddings, operator):
    edge_features_mtx = []
    
    # generate features for each edge in the list
    for node_index_1, node_index_2 in edge_list:
        node_emb_1 = node_embeddings[node_index_1]
        node_emb_2 = node_embeddings[node_index_2]
        
        edge_features_mtx.append(edge_features(node_emb_1, node_emb_2, operator))
        
    return edge_features_mtx

def generate_train_set(graph_train, num_edge_sample, node_embeddings, edge_operator,):
    edge_list = list(graph_train.edges)
    num_nodes = graph_train.number_of_nodes()
    
    train_edges = []
    train_edges_labels = [1] * num_edge_sample + [0] * num_edge_sample
    
    random.seed(0)
    
    # sample edges with label 1 (true edges)
    for edge_num in range(num_edge_sample):
        rand_index = random.randint(0, len(edge_list) - 1)
        
        #train_edges.append(tuple(edge_list[rand_index]))
        train_edges.append(edge_list[rand_index])
    non_edge_num = 0
    
    # sample edges with label 0 (non-exist edges)
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            train_edges.append(rand_nodes)
            non_edge_num += 1

    train_edges_features_mtx = generate_edge_features(train_edges, node_embeddings, edge_operator)
            
    return train_edges, train_edges_features_mtx, train_edges_labels

def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    num_edge_sample = len(edge_list)
    non_edge_num = 0 
    # sample edges with label 0 (non-exist edges)
    
    while(non_edge_num < num_edge_sample):
        rand_nodes = tuple(np.random.randint(low=0,high=num_nodes, size=2))
        
        if rand_nodes not in edge_list:
            test_edges.append(rand_nodes)
            test_edges_labels.append(0)
            non_edge_num += 1
        
    for edge in edge_list:
        test_edges.append(edge)
        test_edges_labels.append(1)
    
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels

'''
def generate_test_set(graph_test, node_embeddings, edge_operator):
    edge_list = graph_test.edges
    nodes_with_edge = set()
    
    for edge in edge_list:
        nodes_with_edge.add(edge[0])
        nodes_with_edge.add(edge[1])
    
    num_nodes = graph_test.number_of_nodes()
    
    test_edges = []
    test_edges_labels = []
    
    # generate all possible edges for each node with at least one edge (assume undirected edges)
    for node_1 in nodes_with_edge:
        for node_2 in range(num_nodes):
            test_edges.append((node_1, node_2))
            
            if (node_1, node_2) in edge_list:
                test_edges_labels.append(1)
            else:
                test_edges_labels.append(0)
            
    test_edges_features_mtx = generate_edge_features(test_edges, node_embeddings, edge_operator)
    
    return test_edges, test_edges_features_mtx, test_edges_labels
'''
def build_clf(feature_mtx, response_vec):
   
    logistic_regression_model = LogisticRegression(random_state = 0,max_iter=5000,solver='liblinear',verbose=1,tol=1e-6)
    binary_clf = logistic_regression_model.fit(feature_mtx, response_vec)
    
    return binary_clf

def pred_links(feature_mtx, LR_clf):
    predict_edges_labels = LR_clf.predict(feature_mtx)
    
    return predict_edges_labels

def precision_recall(predict_labels, true_labels):
    true_positive  = false_positive = 0
    true_negative =  false_negative = 0
    
    for p_label, true_label in zip(predict_labels, true_labels):
        
        #print(p_label,true_label)
        if p_label == true_label and true_label == 1:
            true_positive += 1
        elif p_label == true_label and true_label == 0:
            true_negative += 1
        elif p_label != true_label and true_label == 1:
            false_negative += 1
        elif p_label != true_label and true_label == 0:
            false_positive += 1

    print("TP: ", true_positive)
    print("TN: ", true_negative)
    print("FP: ", false_positive)
    print("FN: ", false_negative)
    
    precision = 0 
    recall = 0
    try: 
        precision = true_positive / (true_positive + false_positive)
        recall = true_positive / (true_positive + false_negative)
        f1 = 2 * (precision * recall) / (precision + recall)
        print("F1: {}".format(f1))
    except: 
        print("F1 Error")
    
    cm = metrics.confusion_matrix(true_labels, predict_labels)
    print(cm)
    print(metrics.classification_report(true_labels, predict_labels))
    map = metrics.average_precision_score(true_labels, predict_labels)
    print('Mean Average Precision: {}'.format(map))
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predict_labels)
    roc_auc = metrics.auc(fpr, tpr)
    print('Area Under ROC Curve: {}'.format(roc_auc))
    
    return precision, recall

## CollegeMsg Monthly w/ Graphwave

In [5]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Month_MSG/msg_1_month_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [6]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,32), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))
np.save('temp_college_msg_monthly.npy', np.asarray(chi_list))

Completed: 0/6
Completed: 1/6
Completed: 2/6
Completed: 3/6
Completed: 4/6
Completed: 5/6


### Using only T-1 embedding

In [7]:
num_edge_sample = 400
edge_operator = 'Average' #'Average', 'Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

In [9]:
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2','Concat']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  310
TN:  548
FP:  42
FN:  280
F1: 0.6581740976645435
[[548  42]
 [280 310]]
              precision    recall  f1-score   support

           0       0.66      0.93      0.77       590
           1       0.88      0.53      0.66       590

    accuracy                           0.73      1180
   macro avg       0.77      0.73      0.72      1180
weighted avg       0.77      0.73      0.72      1180

Mean Average Precision: 0.7000192604006163
Area Under ROC Curve: 0.7271186440677966
Precision:  0.8806818181818182
Recall:  0.5254237288135594


### Use 0...T Embeddings

In [10]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [11]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  378
TN:  518
FP:  72
FN:  212
F1: 0.7269230769230769
[[518  72]
 [212 378]]
              precision    recall  f1-score   support

           0       0.71      0.88      0.78       590
           1       0.84      0.64      0.73       590

    accuracy                           0.76      1180
   macro avg       0.77      0.76      0.76      1180
weighted avg       0.77      0.76      0.76      1180

Mean Average Precision: 0.7178305084745762
Area Under ROC Curve: 0.7593220338983051
Precision:  0.84
Recall:  0.6406779661016949


### Expotential Sum 

In [12]:
embeddings = chi_list
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  370
TN:  532
FP:  58
FN:  220
F1: 0.7269155206286838
[[532  58]
 [220 370]]
              precision    recall  f1-score   support

           0       0.71      0.90      0.79       590
           1       0.86      0.63      0.73       590

    accuracy                           0.76      1180
   macro avg       0.79      0.76      0.76      1180
weighted avg       0.79      0.76      0.76      1180

Mean Average Precision: 0.7285759543798511
Area Under ROC Curve: 0.764406779661017
Precision:  0.8644859813084113
Recall:  0.6271186440677966
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------
[LibLinear]Edge Operator: Concat
TP:  369
TN:  551
FP:  39
FN:  221
F1: 0.7394789579158317
[[551  39]
 [221 369]]
              precision    recall  f1-score   support

           0       0.71      0.93      0.81       590
           1       0.90      0.63      0.74       590

    accuracy             

## CollegeMsg Weekly

In [13]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Week_MSG/msg_1_week_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [14]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/27
Completed: 1/27
Completed: 2/27
Completed: 3/27
Completed: 4/27
Completed: 5/27
Completed: 6/27
Completed: 7/27
Completed: 8/27
Completed: 9/27
Completed: 10/27
Completed: 11/27
Completed: 12/27
Completed: 13/27
Completed: 14/27
Completed: 15/27
Completed: 16/27
Completed: 17/27
Completed: 18/27
Completed: 19/27
Completed: 20/27
Completed: 21/27
Completed: 22/27
Completed: 23/27
Completed: 24/27
Completed: 25/27
Completed: 26/27


### Using only T-1 Embedding

In [16]:
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  11
TN:  85
FP:  1
FN:  75
F1: 0.22448979591836732
[[85  1]
 [75 11]]
              precision    recall  f1-score   support

           0       0.53      0.99      0.69        86
           1       0.92      0.13      0.22        86

    accuracy                           0.56       172
   macro avg       0.72      0.56      0.46       172
weighted avg       0.72      0.56      0.46       172

Mean Average Precision: 0.5532945736434108
Area Under ROC Curve: 0.5581395348837209
Precision:  0.9166666666666666
Recall:  0.12790697674418605


### Pure Sum

In [26]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [28]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  669
TN:  884
FP:  206
FN:  421
F1: 0.6809160305343511
[[884 206]
 [421 669]]
              precision    recall  f1-score   support

           0       0.68      0.81      0.74      1090
           1       0.76      0.61      0.68      1090

    accuracy                           0.71      2180
   macro avg       0.72      0.71      0.71      2180
weighted avg       0.72      0.71      0.71      2180

Mean Average Precision: 0.6623837483617301
Area Under ROC Curve: 0.7123853211009175
Precision:  0.7645714285714286
Recall:  0.6137614678899083


### Exponential Sum

In [29]:
embeddings = chi_list
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  750
TN:  989
FP:  101
FN:  340
F1: 0.7727975270479135
[[989 101]
 [340 750]]
              precision    recall  f1-score   support

           0       0.74      0.91      0.82      1090
           1       0.88      0.69      0.77      1090

    accuracy                           0.80      2180
   macro avg       0.81      0.80      0.80      2180
weighted avg       0.81      0.80      0.80      2180

Mean Average Precision: 0.7623734624133507
Area Under ROC Curve: 0.7977064220183487
Precision:  0.881316098707403
Recall:  0.6880733944954128
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------
[LibLinear]Edge Operator: Concat
TP:  734
TN:  997
FP:  93
FN:  356
F1: 0.7657798643714137
[[997  93]
 [356 734]]
              precision    recall  f1-score   support

           0       0.74      0.91      0.82      1090
           1       0.89      0.67      0.77      1090

    accuracy            

## CollegeMsg Equal (by Month)

In [30]:
#load the graphs 
with open('../data/CollegeMsg/Graphs_Equal_Edge_MSG/msg_equal_edge_dir.pkl', 'rb') as file:
    graphs = pickle.load(file)
graph_train = graphs[-2]
graph_test = graphs[-1]

In [31]:
chi_list = []
heat_print_list = []
taus_list = []
for e, g in enumerate(graphs[:-1]): #last embedding used for link prediction
    chi, heat_print, taus = graphwave_alg(g, np.linspace(0,200,50), taus='auto', verbose=True)
    chi_list.append(chi)
    heat_print_list.append(heat_print)
    taus_list.append(taus)
    print("Completed: {}/{}".format(e,len(graphs[:-1])))

Completed: 0/27
Completed: 1/27
Completed: 2/27
Completed: 3/27
Completed: 4/27
Completed: 5/27
Completed: 6/27
Completed: 7/27
Completed: 8/27
Completed: 9/27
Completed: 10/27
Completed: 11/27
Completed: 12/27
Completed: 13/27
Completed: 14/27
Completed: 15/27
Completed: 16/27
Completed: 17/27
Completed: 18/27
Completed: 19/27
Completed: 20/27
Completed: 21/27
Completed: 22/27
Completed: 23/27
Completed: 24/27
Completed: 25/27
Completed: 26/27


### Using only T-1 Embedding

In [21]:
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, chi_list[-2], edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, chi_list[-1], edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  691
TN:  1006
FP:  84
FN:  399
F1: 0.7410187667560322
[[1006   84]
 [ 399  691]]
              precision    recall  f1-score   support

           0       0.72      0.92      0.81      1090
           1       0.89      0.63      0.74      1090

    accuracy                           0.78      2180
   macro avg       0.80      0.78      0.77      2180
weighted avg       0.80      0.78      0.77      2180

Mean Average Precision: 0.7482610239715892
Area Under ROC Curve: 0.7784403669724771
Precision:  0.8916129032258064
Recall:  0.6339449541284403


### Pure Sum

In [22]:
prev_embedding = np.sum(np.asarray(chi_list[0:-1]),axis=0)
cur_embedding = np.sum(np.asarray(chi_list),axis=0)

In [24]:
num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)
#for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
for edge_operator in ['Concat']:
    train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
    test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

    LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

    print("Edge Operator: {}".format(edge_operator))
    predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
    precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
    print('Precision: ', precision)
    print('Recall: ', recall)

[LibLinear]Edge Operator: Concat
TP:  658
TN:  885
FP:  205
FN:  432
F1: 0.6738351254480286
[[885 205]
 [432 658]]
              precision    recall  f1-score   support

           0       0.67      0.81      0.74      1090
           1       0.76      0.60      0.67      1090

    accuracy                           0.71      2180
   macro avg       0.72      0.71      0.70      2180
weighted avg       0.72      0.71      0.70      2180

Mean Average Precision: 0.6584370714490735
Area Under ROC Curve: 0.7077981651376147
Precision:  0.7624565469293163
Recall:  0.6036697247706422


### Exponential Sum

In [25]:
embeddings = chi_list

num_edge_sample = 400
#Average' or 'Hadamard' or 'Weighted-L1' or 'Weighted-L2'
for decay in [1,0.9,0.5,0.3]:
    print("------------ BEGIN: {} ---------------".format(decay))
    exps = [math.pow(math.e , (-i * decay)) for i in range(1,len(embeddings[:-2]))]
    exps.reverse()
    temp_embedding = np.zeros((embeddings[0]).shape) 
    for c,e in zip(embeddings[0:-2],exps):
         temp_embedding += e * c 
    prev_embedding = temp_embedding + embeddings[-2]
    
    # this is done so the last embedding has weight one. 
    cur_embedding = temp_embedding + exps[-1] * embeddings[-2] + embeddings[-1]
    
    #for edge_operator in ['Average','Hadamard','Weighted-L1','Weighted-L2']:
    for edge_operator in ['Concat']:
        
        try:
            train_edges, train_edges_features_mtx, train_edges_labels = generate_train_set(graph_train, num_edge_sample, prev_embedding, edge_operator)
            test_edges, test_edges_features_mtx, test_edges_labels = generate_test_set(graph_test, cur_embedding, edge_operator)

            LR_clf = build_clf(train_edges_features_mtx, train_edges_labels)

            print("Edge Operator: {}".format(edge_operator))
            predict_edges_labels = pred_links(test_edges_features_mtx, LR_clf)
            precision, recall = precision_recall(list(predict_edges_labels), list(test_edges_labels))
            print('Precision: ', precision)
            print('Recall: ', recall)
        except:
            print("Edge Operator: {} ERROR".format(edge_operator))
    print("------------ END: {} ---------------".format(decay))


------------ BEGIN: 1 ---------------
[LibLinear]Edge Operator: Concat
TP:  761
TN:  998
FP:  92
FN:  329
F1: 0.7833247555326814
[[998  92]
 [329 761]]
              precision    recall  f1-score   support

           0       0.75      0.92      0.83      1090
           1       0.89      0.70      0.78      1090

    accuracy                           0.81      2180
   macro avg       0.82      0.81      0.80      2180
weighted avg       0.82      0.81      0.80      2180

Mean Average Precision: 0.7737822257117352
Area Under ROC Curve: 0.8068807339449541
Precision:  0.8921453692848769
Recall:  0.6981651376146789
------------ END: 1 ---------------
------------ BEGIN: 0.9 ---------------
[LibLinear]Edge Operator: Concat
TP:  741
TN:  989
FP:  101
FN:  349
F1: 0.7670807453416149
[[989 101]
 [349 741]]
              precision    recall  f1-score   support

           0       0.74      0.91      0.81      1090
           1       0.88      0.68      0.77      1090

    accuracy           