In [1]:
%load_ext autoreload
%autoreload 2
import os, sys, re, datetime, random, gzip, json, copy
from pathlib import Path
import networkx as nx
import numpy as np
import pandas as pd
import itertools
import collections
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import linear_model
from sklearn.metrics import roc_auc_score
from scipy import spatial
from sklearn.neighbors import NearestNeighbors

import matplotlib.pyplot as plt
PROJ_PATH = Path(os.path.join(re.sub("/CTGCN.*$", '', os.getcwd()), 'CTGCN'))
sys.path.insert(1, str(str(PROJ_PATH.parents[0] / 'DySubG/src/')))
from ranking import Evaluation
# exec(open(str(PROJ_PATH.parents[0] / 'DySubG/src/ranking.py')).read())

## Co-authorship

In [4]:
def read_node_embedding(exp, method, num_time_steps):
    node_embedding = {}
    nodes = pd.read_csv(f'./data/{exp}/nodes_set/nodes.csv', names=['nodes'])['nodes'].values
    for time_id in range(num_time_steps-2, num_time_steps):
        embs = pd.read_csv(
            './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values
        dict_embs = {k:v for k,v in zip(nodes, embs)}
        node_embedding[time_id] = dict_embs
    return node_embedding

def make_prediction(author_idx, authors, node_embedding, num_time_steps=8):
    time_id = num_time_steps - 2
    k = 50
    pred = []
    for i, aid in enumerate(author_idx):
#         if i % 10 == 0: print(i)
        distances = []
        author_embed = node_embedding[time_id][aid]
        for co in authors:
            if co != aid:
                co_author_embed = node_embedding[time_id][co]
                dist = spatial.distance.cosine(author_embed, co_author_embed)
                distances.append((aid, co, dist))
        distances.sort(key=lambda tup: tup[2])
        pred += distances[:k]
    pd_pred = pd.DataFrame(pred, columns=['author', 'co_author', 'sims'])
    return pd_pred

def eval_ranking(pred_dict, true_dict, k):
    predicted_indices = [] # collections.OrderedDict()
    true_indices = [] # collections.OrderedDict()
    author_indices = sorted(list(true_dict.keys()))
    for aid in author_indices:
        predicted_indices.append(pred_dict[aid])
        true_indices.append(true_dict[aid])
    eval_agent = Evaluation(predicted_indices, true_indices, k)
    return eval_agent.result

def print_report(exp='dblp', k=20, num_time_steps=8, recompute=True, methods=[], selected_methods=[]):
    if len(methods) == 0:
        methods = [
            'GCN', 'GAT', 'SAGE', 'GIN', 
            'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
            'TIMERS', 'DynAE', 'DynRNN', 'DynAERNN', 'DynGEM', 
            'VGRNN', 'EvolveGCN', 'CTGCN-C',
        ]
    if len(selected_methods) == 0:
        selected_methods = [
            'GCN', 'TgGAT', 'TgSAGE', 'TgGIN', 'DynAE', 
            'DynRNN', 'DynAERNN', 'DynGEM', 'EvolveGCN', 
            'VGRNN', 'CTGCN-C']
        
    try:
        pd_edges = pd.read_csv(f'./data/{exp}/0.input/temporal_edge_list.txt', sep=' ', names=['source_id', 'target_id', 'time_id'])
    except:
        pd_edges = pd.read_csv(f'./data/{exp}/0.input/edges.csv')
    
#     data = pd.read_pickle(f'./data/{exp}/0.input/data.pkl')
    node_types = pd.read_csv(f'../DySubG/dataset/dblp/node_types.csv')
    authors = sorted(node_types[node_types['node_type_name']=='author']['node_id'].values)
    
#     num_time_steps = pd_edges['time_id'].max() + 1
    ground_truth = pd.read_pickle(f'./data/{exp}/0.input/ground_truth.pkl')
    author_idx = sorted(ground_truth.keys())
    
    res = []
    for method in methods:
        print(method)
        if os.path.exists(f'./data/{exp}/0.input/{method}_pred.csv') and not recompute:
            pd_pred = pd.read_csv(f'./data/{exp}/0.input/{method}_pred.csv')
        else:
            node_embedding = read_node_embedding(exp, method, num_time_steps)
            pd_pred = make_prediction(author_idx, authors, node_embedding, num_time_steps)
            pd_pred.to_csv(f'./data/{exp}/0.input/{method}_pred.csv', index=False)
        
        pred = pd_pred.groupby('author').agg({'co_author': list}).to_dict()['co_author']
        results = eval_ranking(pred, ground_truth, k)
        tmp = results[results['k']==k]
        tmp['method'] = method
        res.append(tmp[['method', 'k', 'recall', 'mrr', 'map', 'ndcg']])
    
    df = pd.concat(res)

    print('Full report')
    display(df)

    print('Selected methods')
    display(df[df['method'].isin(selected_methods)])
    return df

In [17]:
methods = [
    'GCN', 'GAT',
    'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
    'TIMERS', 'DynAE', #'DynRNN', 
    'DynAERNN', 'DynGEM', 
    'VGRNN',
    'EvolveGCN', 'CTGCN-C',]

report = print_report('dblp', k=10, methods=methods, recompute=False)

GCN


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


GAT
TgGCN
TgGAT
TgSAGE
TgGIN
TIMERS
DynAE
DynAERNN
DynGEM
VGRNN
EvolveGCN
CTGCN-C
Full report


Unnamed: 0,method,k,recall,mrr,map,ndcg
10,GCN,10,0.354715,0.501513,0.301855,0.363781
10,GAT,10,0.047845,0.053498,0.024352,0.036737
10,TgGCN,10,0.17336,0.2417,0.128495,0.165419
10,TgGAT,10,0.222234,0.296272,0.16242,0.208323
10,TgSAGE,10,0.077817,0.08982,0.04713,0.065
10,TgGIN,10,0.177133,0.277165,0.145333,0.181739
10,TIMERS,10,0.279478,0.385739,0.222857,0.276352
10,DynAE,10,0.161054,0.239675,0.135989,0.164583
10,DynAERNN,10,0.081166,0.117677,0.073443,0.084777
10,DynGEM,10,0.139842,0.208327,0.116578,0.142428


Selected methods


Unnamed: 0,method,k,recall,mrr,map,ndcg
10,GCN,10,0.354715,0.501513,0.301855,0.363781
10,TgGAT,10,0.222234,0.296272,0.16242,0.208323
10,TgSAGE,10,0.077817,0.08982,0.04713,0.065
10,TgGIN,10,0.177133,0.277165,0.145333,0.181739
10,DynAE,10,0.161054,0.239675,0.135989,0.164583
10,DynAERNN,10,0.081166,0.117677,0.073443,0.084777
10,DynGEM,10,0.139842,0.208327,0.116578,0.142428
10,VGRNN,10,0.106373,0.158931,0.074483,0.101015
10,EvolveGCN,10,0.121292,0.175126,0.098993,0.12077
10,CTGCN-C,10,0.094484,0.120076,0.061203,0.082084


In [21]:
exp = 'dblp'
method = 'GCN'
import torch
for time_id in range(8):
    embs = pd.read_csv(
            './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t')
    torch.save(torch.tensor(embs.values), './{}_gin_gcn_embeddings.pth'.format(time_id))

In [20]:
torch.tensor(embs.values)

tensor([[ 0.0022,  0.0028, -0.0096,  ...,  0.0029,  0.0004, -0.0132],
        [ 0.0094,  0.0003, -0.0032,  ..., -0.0003, -0.0043, -0.0025],
        [ 0.0017,  0.0042, -0.0103,  ...,  0.0025, -0.0011, -0.0083],
        ...,
        [ 0.0035,  0.0018, -0.0142,  ...,  0.0008,  0.0008, -0.0098],
        [-0.0042, -0.0002,  0.0048,  ..., -0.0023, -0.0028, -0.0022],
        [-0.0012, -0.0016, -0.0010,  ...,  0.0020,  0.0021,  0.0003]],
       dtype=torch.float64)

## Expertise

In [2]:
def get_training_data(data, cname2cid, node_embedding):
    all_labels = list(cname2cid.keys())
    train_pos = []
    train_neg = []
    val_pos = []
    val_neg = []
    test_pos = []
    test_neg = []
    
    for i,d in data.items():
        node_id = d['node_id']
        time_id = d['time_id']
        pos_labels = d['label']
        if 'neg_label' in d:
            neg_labels = d['neg_label']
        else:
            neg_labels = [i for i in all_labels if i not in pos_labels]
        if len(pos_labels) > 0:
            for l in pos_labels:
                label_nid = cname2cid[l]
                pos_pair = (
                    np.array(node_embedding[time_id][node_id]), 
                    np.array(node_embedding[time_id][label_nid]))
                if d['dataset'] == 'train':
                    train_pos.append(pos_pair)
                elif d['dataset'] == 'val':
                    val_pos.append(pos_pair)
                elif d['dataset'] == 'test':
                    test_pos.append(pos_pair)
        if len(neg_labels) > 0:   
            for l in neg_labels:
                label_nid = cname2cid[l]
                neg_pair = (
                    np.array(node_embedding[time_id][node_id]), 
                    np.array(node_embedding[time_id][label_nid]))
                if d['dataset'] == 'train':
                    train_neg.append(neg_pair)
                elif d['dataset'] == 'val':
                    val_neg.append(neg_pair)
                elif d['dataset'] == 'test':
                    test_neg.append(neg_pair)
    return train_pos, train_neg, val_pos, val_neg, test_pos, test_neg


def get_link_score(fu, fv, operator='HAD'):
    """Given a pair of embeddings, compute link feature based on operator (such as Hadammad product, etc.)"""
    fu = np.array(fu)
    fv = np.array(fv)
    if operator == 'HAD':
        return np.multiply(fu, fv)
    elif operator == 'AVG':
        return (fu + fv) / 2
    elif operator == 'L1':
        return np.abs(fu - fv)
    elif operator == 'L2':
        return (fu - fv) ** 2
    else:
        raise NotImplementedError

def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
def predict_link_without_classifier(train_pos, train_neg, val_pos, val_neg, test_pos, test_neg):
    pred_train = [sigmoid(np.dot(e[0], e[1].T)) for e in train_pos + train_neg]
    label_train = [1] * len(train_pos) + [0] * len(train_neg)
    pred_val = [sigmoid(np.dot(e[0], e[1].T)) for e in val_pos + val_neg]
    label_val = [1] * len(val_pos) + [0] * len(val_neg)
    pred_test = [sigmoid(np.dot(e[0], e[1].T)) for e in test_pos + test_neg]
    label_test = [1] * len(test_pos) + [0] * len(test_neg)
    return pred_train, label_train, pred_val, label_val, pred_test, label_test
    
def predict_link_with_classifier(train_pos, train_neg, val_pos, val_neg, test_pos, test_neg, operator):
    train_feats = np.array([get_link_score(e[0], e[1], operator) for e in train_pos + train_neg])
    val_feats = np.array([get_link_score(e[0], e[1], operator) for e in val_pos + val_neg])
    test_feats = np.array([get_link_score(e[0], e[1], operator) for e in test_pos + test_neg])
    label_train = np.array([1] * len(train_pos) + [0] * len(train_neg))
    label_val = np.array([1] * len(val_pos) + [0] * len(val_neg))
    label_test = np.array([1] * len(test_pos) + [0] * len(test_neg))
    
    clf = linear_model.LogisticRegression(max_iter=5000)
#     clf.fit(train_feats, label_train)
    clf.fit(np.concatenate((train_feats, val_feats)), 
            np.concatenate((label_train, label_val)))
    pred_train = clf.predict_proba(train_feats)[:, 1]
    pred_val = clf.predict_proba(val_feats)[:, 1]
    pred_test = clf.predict_proba(test_feats)[:, 1]

    return pred_train, label_train, pred_val, label_val, pred_test, label_test, clf

def evaluate_classifier(train_pos, train_neg, val_pos, val_neg, test_pos, test_neg, operators=['HAD'], threshold=0.5):
    results = {}
    models = {}
    # Predict without classifier
    pred_train, label_train, pred_val, label_val, pred_test, label_test = predict_link_without_classifier(
        train_pos, train_neg, val_pos, val_neg, test_pos, test_neg)
    results['sigmoid'] = {
        'train': roc_auc_score(label_train, pred_train), 
        'val': roc_auc_score(label_val, pred_val),
        'test': roc_auc_score(label_test, pred_test), 
    }
    results['sigmoid_f1']  = {
        'train': f1_score(label_train, [1 if i >= threshold else 0 for i in pred_train]), 
        'val': f1_score(label_val, [1 if i >= threshold else 0 for i in pred_val]),
        'test': f1_score(label_test,[1 if i >= threshold else 0 for i in pred_test]),
    }
    
    # Predict with classifier
    for operator in operators:
        pred_train, label_train, pred_val, label_val, pred_test, label_test, clf = predict_link_with_classifier(
            train_pos, train_neg, val_pos, val_neg, test_pos, test_neg, operator)
        results[operator] = {
            'train': roc_auc_score(label_train, pred_train), 
            'val': roc_auc_score(label_val, pred_val),
            'test': roc_auc_score(label_test, pred_test), 
        }
        results[f'{operator}_f1']  = {
            'train': f1_score(label_train, [1 if i >= threshold else 0 for i in pred_train]), 
            'val': f1_score(label_val, [1 if i >= threshold else 0 for i in pred_val]),
            'test': f1_score(label_test,[1 if i >= threshold else 0 for i in pred_test]),
        }
        models[operator] = clf
    return results, models

def eval_lp(data, cname2cid, method='CTGCN-C', num_time_steps=8, exp='dblp', operators=['HAD'], verbose=True, threshold=0.5):
    node_embedding = {}
    nodes = pd.read_csv(f'./data/{exp}/nodes_set/nodes.csv', names=['nodes'])['nodes'].values
    start_idx = min([d['time_id'] for i,d in data.items()])
    for time_id in range(start_idx, num_time_steps):
        embs = pd.read_csv(
            './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values
        dict_embs = {k:v for k,v in zip(nodes, embs)}
        node_embedding[time_id] = dict_embs
    train_pos, train_neg, val_pos, val_neg, test_pos, test_neg = get_training_data(
        data, cname2cid, node_embedding)
    if verbose:
        print(len(train_pos), len(train_neg), len(val_pos), len(val_neg), len(test_pos), len(test_neg))
    results = evaluate_classifier(train_pos, train_neg, val_pos, val_neg, test_pos, test_neg, operators)
    return results


def print_report(exp='imdb', methods=[], selected_methods=[], threshold=0.5):
    if len(methods) == 0:
        methods = [
            'GCN', 'GAT', 'SAGE', 'GIN', 
            'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
            'GCRN', 'TIMERS', 'DynAE', 'DynRNN', 'DynAERNN', 'DynGEM', 'DySAT',
            'VGRNN', 'EvolveGCN', 'CTGCN-C',
        ]
    if len(selected_methods) == 0:
        selected_methods = [
            'GCN', 'TgGAT', 'TgSAGE', 'TgGIN',
            'GCRN', 'TIMERS', 'DynAE', 'DynRNN', 'DynAERNN', 'DynGEM', 'DySAT',
            'VGRNN', 'EvolveGCN', 'CTGCN-C']
        
    try:
        pd_edges = pd.read_csv(f'./data/{exp}/0.input/temporal_edge_list.txt', sep=' ', names=['source_id', 'target_id', 'time_id'])
    except:
        pd_edges = pd.read_csv(f'./data/{exp}/0.input/edges.csv')
        
    data = pd.read_pickle(f'./data/{exp}/0.input/data.pkl')
    all_labels = list(set(itertools.chain(*[d['label'] for i, d in data.items()])))
    cid2cname = pd.read_pickle('/home/hoang/github/DySubG/dataset/dblp/cid2cname.pkl')
    cname2cid = {j:i for i,j in cid2cname.items() if j in all_labels}
    num_time_steps = pd_edges['time_id'].max() + 1

    operators = ['HAD', 'AVG', 'L1', 'L2']
    res = []
    for method in methods:
        print(method)
        if method in ['DynAE', 'DynRNN', 'DynAERNN']:
            filtered_data = {i:j for i,j in data.items() if j['time_id']>=1}
            results, models = eval_lp(filtered_data, cname2cid, method, num_time_steps, exp, operators, threshold=threshold)
        else:
            results, models = eval_lp(data, cname2cid, method, num_time_steps, exp, operators, threshold=threshold)
        tmp = pd.DataFrame(results)
        tmp['method'] = method
        res.append(tmp)
    df = pd.concat(res)
    df = df.reset_index().rename(columns={'index':'dataset'})
    df['best_AUC'] = df[['sigmoid', 'HAD', 'AVG', 'L1', 'L2']].max(axis=1)
    df['best_F1'] = df[['sigmoid_f1', 'HAD_f1', 'AVG_f1', 'L1_f1', 'L2_f1']].max(axis=1)

    print('Full report')
    display(df)

    print('Test report')
    display(df[df['dataset']=='test'])

    print('Selected methods')
    display(df[(df['dataset']=='test')&(df['method'].isin(selected_methods))][
        ['method', 'sigmoid', 'HAD', 'AVG', 'L1', 'L2', 'best_AUC',
         'sigmoid_f1', 'HAD_f1', 'AVG_f1', 'L1_f1', 'L2_f1', 'best_F1']])
    return df, models

def make_prediction(pred_idx, node_embedding, models):
    ranking = {}
    # 
    distances = []
    for s, ts in pred_idx.items():
        for t in ts:
            source = node_embedding[s]
            target = node_embedding[t]
            probs = sigmoid(np.dot(source, target.T))
            distances.append((s, t, probs))
        distances.sort(key=lambda tup: tup[2])
    ranking['sigmoid'] = distances                   
    #
    for operator, model in models.items():
        distances = []
        for s, ts in pred_idx.items():
            for t in ts:
                source = node_embedding[s]
                target = node_embedding[t]
                feats = np.array([get_link_score(source, target, operator)])
                probs = model.predict_proba(feats)[:, 1][0]
                distances.append((s, t, probs))
            distances.sort(key=lambda tup: tup[2])
        ranking[operator] = distances
    return ranking

def eval_ranking(pred_dict, true_dict, k):
    predicted_indices = [] # collections.OrderedDict()
    true_indices = [] # collections.OrderedDict()
    author_indices = sorted(list(true_dict.keys()))
    for aid in author_indices:
        predicted_indices.append(pred_dict[aid])
        true_indices.append(true_dict[aid])
    eval_agent = Evaluation(predicted_indices, true_indices, k)
    return eval_agent.result

In [9]:
# data = pd.read_pickle(f'./data/dblp/0.input/data.pkl')
# all_labels = list(set(itertools.chain(*[d['label'] for i, d in data.items()])))

# new_data = {}
# for i,d in data.items():
#     pos_label = d['label']
#     neg_label = [i for i in all_labels if i not in pos_label]
#     random.shuffle(neg_label)
#     neg_label = neg_label[:len(pos_label)]
#     d['neg_label'] = neg_label
#     new_data[i] = d
    
    
# pd.to_pickle(new_data, f'./data/dblp/0.input/data.pkl')

In [5]:
methods = [
    'GCN', 'GAT', #'SAGE', 'GIN', 
    'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
    'GCRN', 'TIMERS', 'DynAE', #'DynRNN', 
    'DynAERNN', 'DynGEM', 'DySAT',
    'VGRNN', 'EvolveGCN', 'CTGCN-C',
]
df, models = print_report(exp='dblp', methods=methods)

GCN
5949 12311 1409 2786 759 1886
GAT
5949 12311 1409 2786 759 1886
TgGCN
5949 12311 1409 2786 759 1886
TgGAT
5949 12311 1409 2786 759 1886
TgSAGE
5949 12311 1409 2786 759 1886
TgGIN
5949 12311 1409 2786 759 1886
GCRN
5949 12311 1409 2786 759 1886
TIMERS
5949 12311 1409 2786 759 1886
DynAE
5193 10667 1409 2786 759 1886
DynAERNN
5193 10667 1409 2786 759 1886
DynGEM
5949 12311 1409 2786 759 1886
DySAT
5949 12311 1409 2786 759 1886
VGRNN
5949 12311 1409 2786 759 1886
EvolveGCN
5949 12311 1409 2786 759 1886
CTGCN-C
5949 12311 1409 2786 759 1886
Full report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
0,train,0.580619,0.489292,0.5551,0.0,0.555284,0.0,0.576915,0.0,0.574239,0.0,GCN,0.580619,0.489292
1,val,0.604251,0.501357,0.527273,0.0,0.574047,0.0,0.594976,0.0,0.593025,0.0,GCN,0.604251,0.501357
2,test,0.612739,0.444248,0.503518,0.0,0.471489,0.0,0.602248,0.0,0.603026,0.0,GCN,0.612739,0.444248
3,train,0.5,0.49147,0.555153,0.0,0.555706,0.0,0.565627,0.0,0.565508,0.0,GAT,0.565627,0.49147
4,val,0.5,0.502855,0.55824,0.0,0.558657,0.0,0.574856,0.0,0.574362,0.0,GAT,0.574856,0.502855
5,test,0.5,0.445946,0.523163,0.0,0.523001,0.0,0.56842,0.0,0.570361,0.0,GAT,0.570361,0.445946
6,train,0.550593,0.202253,0.549053,0.0,0.538984,0.0,0.518486,0.0,0.501304,0.0,TgGCN,0.550593,0.202253
7,val,0.556282,0.214088,0.556717,0.0,0.570001,0.0,0.506594,0.0,0.490669,0.0,TgGCN,0.570001,0.214088
8,test,0.561694,0.265976,0.562762,0.0,0.481103,0.0,0.480071,0.0,0.48053,0.0,TgGCN,0.562762,0.265976
9,train,0.535343,0.475571,0.554155,0.0,0.542727,0.0,0.567358,0.0,0.566798,0.0,TgGAT,0.567358,0.475571


Test report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
2,test,0.612739,0.444248,0.503518,0.0,0.471489,0.0,0.602248,0.0,0.603026,0.0,GCN,0.612739,0.444248
5,test,0.5,0.445946,0.523163,0.0,0.523001,0.0,0.56842,0.0,0.570361,0.0,GAT,0.570361,0.445946
8,test,0.561694,0.265976,0.562762,0.0,0.481103,0.0,0.480071,0.0,0.48053,0.0,TgGCN,0.562762,0.265976
11,test,0.540466,0.453242,0.504695,0.0,0.501387,0.0,0.591223,0.0,0.591225,0.0,TgGAT,0.591225,0.453242
14,test,0.532003,0.398821,0.477569,0.0,0.486947,0.0,0.566145,0.0,0.566339,0.0,TgSAGE,0.566339,0.398821
17,test,0.549542,0.213725,0.40965,0.0,0.461855,0.0,0.470594,0.0,0.426502,0.0,TgGIN,0.549542,0.213725
20,test,0.536459,0.437811,0.592245,0.04908,0.56781,0.0,0.572748,0.240588,0.575493,0.012837,GCRN,0.592245,0.437811
23,test,0.684041,0.509038,0.756864,0.410697,0.513067,0.0,0.581598,0.33204,0.580275,0.365759,TIMERS,0.756864,0.509038
26,test,0.504181,0.445946,0.458885,0.0,0.597534,0.0,0.552572,0.0,0.435636,0.0,DynAE,0.597534,0.445946
29,test,0.414933,0.445946,0.41049,0.0,0.593267,0.0,0.55215,0.0,0.447785,0.0,DynAERNN,0.593267,0.445946


Selected methods


Unnamed: 0,method,sigmoid,HAD,AVG,L1,L2,best_AUC,sigmoid_f1,HAD_f1,AVG_f1,L1_f1,L2_f1,best_F1
2,GCN,0.612739,0.503518,0.471489,0.602248,0.603026,0.612739,0.444248,0.0,0.0,0.0,0.0,0.444248
11,TgGAT,0.540466,0.504695,0.501387,0.591223,0.591225,0.591225,0.453242,0.0,0.0,0.0,0.0,0.453242
14,TgSAGE,0.532003,0.477569,0.486947,0.566145,0.566339,0.566339,0.398821,0.0,0.0,0.0,0.0,0.398821
17,TgGIN,0.549542,0.40965,0.461855,0.470594,0.426502,0.549542,0.213725,0.0,0.0,0.0,0.0,0.213725
20,GCRN,0.536459,0.592245,0.56781,0.572748,0.575493,0.592245,0.437811,0.04908,0.0,0.240588,0.012837,0.437811
23,TIMERS,0.684041,0.756864,0.513067,0.581598,0.580275,0.756864,0.509038,0.410697,0.0,0.33204,0.365759,0.509038
26,DynAE,0.504181,0.458885,0.597534,0.552572,0.435636,0.597534,0.445946,0.0,0.0,0.0,0.0,0.445946
29,DynAERNN,0.414933,0.41049,0.593267,0.55215,0.447785,0.593267,0.445946,0.0,0.0,0.0,0.0,0.445946
32,DynGEM,0.567981,0.545054,0.583522,0.445676,0.507668,0.583522,0.445946,0.16684,0.0,0.168474,0.372968,0.445946
35,DySAT,0.505889,0.514286,0.514579,0.538453,0.536407,0.538453,0.4361,0.0,0.0,0.007853,0.010403,0.4361


In [14]:
def get_ground_truth(data, all_labels, label_mapping):
    ground_truth = {}
    pred_idx = {}
    for i,d in data.items():
        node_id = d['node_id']
        time_id = d['time_id']
        pos_labels = d['label']
        neg_labels = [i for i in all_labels if i not in pos_labels]
        if len(pos_labels) > 0:
            ground_truth[node_id] = [label_mapping[l] for l in pos_labels]
        pred_idx[node_id] = [label_mapping[l] for l in all_labels]
    return ground_truth, pred_idx

In [21]:
exp = 'dblp'
data = pd.read_pickle(f'./data/{exp}/0.input/data.pkl')
all_labels = list(set(itertools.chain(*[d['label'] for i, d in data.items()])))
cid2cname = pd.read_pickle('/home/hoang/github/DySubG/dataset/dblp/cid2cname.pkl')
label_mapping = {j:i for i,j in cid2cname.items() if j in all_labels}
ground_truth = {}
pred_idx = {}
test_data = {i: d for i,d in data.items() if d['dataset']=='test'}
# ground_truth, pred_idx = get_ground_truth(test_data, all_labels, label_mapping)
for i,d in test_data.items():
    node_id = d['node_id']
    time_id = d['time_id']
    pos_labels = d['label']
    neg_labels = [i for i in all_labels if i not in pos_labels]
    if time_id == 6:
        if len(pos_labels) > 0:
            ground_truth[node_id] = [cname2cid[l] for l in pos_labels]
        pred_idx[node_id] = [cname2cid[l] for l in all_labels]

In [22]:
exp = 'dblp'
for method in methods:
    time_id = 6
    node_embedding = pd.read_csv(
                './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values

    ranking = make_prediction(pred_idx, node_embedding, models)
    eval_metrics = []
    for operator, rk in ranking.items():
        pd_pred = pd.DataFrame(rk, columns=['source', 'target', 'sims']).sort_values(['sims'], ascending=False)
        pred = pd_pred.groupby('source').agg({'target': list}).to_dict()['target']
        res = eval_ranking(pred, ground_truth, k=5)
        res['operator'] = operator
        eval_metrics.append(res)
    pd_res = pd.concat(eval_metrics)
    print(method)
    display(pd_res[pd_res['k']==1])

GCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.319597,0.444234,0.319597,0.444234,sigmoid
1,1,0.243825,0.351607,0.243825,0.351607,HAD
1,1,0.168683,0.224953,0.168683,0.224953,AVG
1,1,0.228387,0.332703,0.228387,0.332703,L1
1,1,0.231695,0.330813,0.231695,0.330813,L2


GAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.232955,0.334594,0.232955,0.334594,sigmoid
1,1,0.232955,0.334594,0.232955,0.334594,HAD
1,1,0.319282,0.402647,0.319282,0.402647,AVG
1,1,0.174827,0.26276,0.174827,0.26276,L1
1,1,0.199401,0.281664,0.199401,0.281664,L2


TgGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.184751,0.275992,0.184751,0.275992,sigmoid
1,1,0.223661,0.308129,0.223661,0.308129,HAD
1,1,0.22051,0.328922,0.22051,0.328923,AVG
1,1,0.146156,0.240076,0.146156,0.240076,L1
1,1,0.146156,0.240076,0.146156,0.240076,L2


TgGAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.253907,0.357278,0.253907,0.357278,sigmoid
1,1,0.243982,0.347826,0.243982,0.347826,HAD
1,1,0.22051,0.328922,0.22051,0.328923,AVG
1,1,0.244928,0.338374,0.244928,0.338374,L1
1,1,0.238311,0.332703,0.238311,0.332703,L2


TgSAGE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.221613,0.3138,0.221613,0.3138,sigmoid
1,1,0.209011,0.302457,0.209011,0.302457,HAD
1,1,0.319282,0.402647,0.319282,0.402647,AVG
1,1,0.225866,0.325142,0.225866,0.325142,L1
1,1,0.212319,0.310019,0.212319,0.310019,L2


TgGIN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.173409,0.270321,0.173409,0.270321,sigmoid
1,1,0.201292,0.285444,0.201292,0.285444,HAD
1,1,0.168683,0.224953,0.168683,0.224953,AVG
1,1,0.146156,0.240076,0.146156,0.240076,L1
1,1,0.146156,0.240076,0.146156,0.240076,L2


GCRN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.239099,0.334594,0.239099,0.334594,sigmoid
1,1,0.279742,0.36862,0.279742,0.36862,HAD
1,1,0.146156,0.240076,0.146156,0.240076,AVG
1,1,0.186326,0.268431,0.186326,0.268431,L1
1,1,0.214367,0.296786,0.214367,0.296786,L2


TIMERS


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.406868,0.542533,0.406868,0.542533,sigmoid
1,1,0.23138,0.334594,0.23138,0.334594,HAD
1,1,0.145369,0.238185,0.145369,0.238185,AVG
1,1,0.315974,0.400756,0.315974,0.400756,L1
1,1,0.168683,0.224953,0.168683,0.224953,L2


DynAE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.145369,0.238185,0.145369,0.238185,sigmoid
1,1,0.319282,0.402647,0.319282,0.402647,HAD
1,1,0.22051,0.328922,0.22051,0.328923,AVG
1,1,0.146156,0.240076,0.146156,0.240076,L1
1,1,0.146156,0.240076,0.146156,0.240076,L2


DynAERNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.146156,0.240076,0.146156,0.240076,sigmoid
1,1,0.145369,0.238185,0.145369,0.238185,HAD
1,1,0.319282,0.402647,0.319282,0.402647,AVG
1,1,0.146156,0.240076,0.146156,0.240076,L1
1,1,0.146156,0.240076,0.146156,0.240076,L2


DynGEM


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.299433,0.383743,0.299433,0.383743,sigmoid
1,1,0.232798,0.319471,0.232798,0.319471,HAD
1,1,0.146156,0.240076,0.146156,0.240076,AVG
1,1,0.197511,0.304348,0.197511,0.304348,L1
1,1,0.22051,0.328922,0.22051,0.328923,L2


DySAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.228702,0.319471,0.228702,0.319471,sigmoid
1,1,0.206333,0.287335,0.206333,0.287335,HAD
1,1,0.211689,0.306238,0.211689,0.306238,AVG
1,1,0.205545,0.293006,0.205545,0.293006,L1
1,1,0.226812,0.310019,0.226812,0.310019,L2


VGRNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.30794,0.391304,0.30794,0.391304,sigmoid
1,1,0.155608,0.249527,0.155608,0.249527,HAD
1,1,0.146156,0.240076,0.146156,0.240076,AVG
1,1,0.146156,0.240076,0.146156,0.240076,L1
1,1,0.319282,0.402647,0.319282,0.402647,L2


EvolveGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.207908,0.304348,0.207908,0.304348,sigmoid
1,1,0.325898,0.431002,0.325898,0.431002,HAD
1,1,0.232955,0.334594,0.232955,0.334594,AVG
1,1,0.232955,0.334594,0.232955,0.334594,L1
1,1,0.232955,0.334594,0.232955,0.334594,L2


CTGCN-C


Unnamed: 0,k,recall,mrr,map,ndcg,operator
1,1,0.221141,0.311909,0.221141,0.311909,sigmoid
1,1,0.226024,0.323251,0.226024,0.323251,HAD
1,1,0.168683,0.224953,0.168683,0.224953,AVG
1,1,0.214052,0.285444,0.214052,0.285444,L1
1,1,0.207908,0.283554,0.207908,0.283554,L2


In [23]:
exp = 'dblp'
for method in methods:
    time_id = 6
    node_embedding = pd.read_csv(
                './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values

    ranking = make_prediction(pred_idx, node_embedding, models)
    eval_metrics = []
    for operator, rk in ranking.items():
        pd_pred = pd.DataFrame(rk, columns=['source', 'target', 'sims']).sort_values(['sims'], ascending=False)
        pred = pd_pred.groupby('source').agg({'target': list}).to_dict()['target']
        res = eval_ranking(pred, ground_truth, k=5)
        res['operator'] = operator
        eval_metrics.append(res)
    pd_res = pd.concat(eval_metrics)
    print(method)
    display(pd_res[pd_res['k']==2])

GCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.542943,0.55104,0.451024,0.524945,sigmoid
2,2,0.463548,0.456522,0.37218,0.440823,HAD
2,2,0.487965,0.389414,0.345164,0.403977,AVG
2,2,0.431411,0.442344,0.34414,0.414189,L1
2,2,0.46087,0.449905,0.361862,0.432844,L2


GAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.393604,0.408318,0.335082,0.388377,sigmoid
2,2,0.393604,0.408318,0.335082,0.388377,HAD
2,2,0.487965,0.478261,0.420463,0.463351,AVG
2,2,0.363831,0.372401,0.280813,0.346405,L1
2,2,0.404159,0.395085,0.313579,0.376425,L2


TgGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.343667,0.36673,0.277111,0.33809,sigmoid
2,2,0.443069,0.431002,0.345243,0.410316,HAD
2,2,0.365879,0.388469,0.315627,0.371894,AVG
2,2,0.290107,0.314745,0.232609,0.292554,L1
2,2,0.291525,0.31569,0.233554,0.294016,L2


TgGAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.438815,0.452741,0.361783,0.426027,sigmoid
2,2,0.449212,0.455577,0.361232,0.429964,HAD
2,2,0.366667,0.395085,0.312161,0.374009,AVG
2,2,0.444329,0.449905,0.356506,0.422785,L1
2,2,0.444644,0.436673,0.359026,0.422155,L2


TgSAGE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.397858,0.416824,0.319014,0.383358,sigmoid
2,2,0.37029,0.388469,0.301371,0.362126,HAD
2,2,0.487965,0.478261,0.420463,0.463351,AVG
2,2,0.415658,0.422495,0.3339,0.397582,L1
2,2,0.426843,0.426276,0.331065,0.400967,L2


TgGIN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.336106,0.358223,0.269234,0.332341,sigmoid
2,2,0.420542,0.420605,0.31736,0.387362,HAD
2,2,0.487965,0.389414,0.345164,0.403977,AVG
2,2,0.291525,0.31569,0.233554,0.294016,L1
2,2,0.291525,0.31569,0.233554,0.294016,L2


GCRN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.429206,0.425331,0.351386,0.4115,sigmoid
2,2,0.522936,0.491493,0.421172,0.483466,HAD
2,2,0.291525,0.31569,0.233554,0.294016,AVG
2,2,0.409515,0.393195,0.31098,0.375468,L1
2,2,0.429994,0.410208,0.339493,0.398321,L2


TIMERS


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.579962,0.613422,0.519392,0.582536,sigmoid
2,2,0.491115,0.469754,0.378245,0.451668,HAD
2,2,0.314052,0.34121,0.233003,0.295234,AVG
2,2,0.487965,0.477316,0.418809,0.462384,L1
2,2,0.314839,0.331758,0.246078,0.300022,L2


DynAE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.314052,0.34121,0.233003,0.295234,sigmoid
2,2,0.486074,0.482042,0.417943,0.463621,HAD
2,2,0.365879,0.388469,0.315627,0.371894,AVG
2,2,0.314839,0.339319,0.234814,0.298587,L1
2,2,0.314839,0.339319,0.234814,0.298587,L2


DynAERNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.314839,0.339319,0.234814,0.298587,sigmoid
2,2,0.46465,0.42155,0.31098,0.396734,HAD
2,2,0.46465,0.503781,0.397936,0.458781,AVG
2,2,0.314839,0.339319,0.234814,0.298587,L1
2,2,0.314839,0.339319,0.234814,0.298587,L2


DynGEM


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.517108,0.489603,0.425819,0.480002,sigmoid
2,2,0.481191,0.470699,0.363359,0.44055,HAD
2,2,0.314839,0.339319,0.234814,0.298587,AVG
2,2,0.352804,0.375236,0.292155,0.354588,L1
2,2,0.35942,0.389414,0.309326,0.369239,L2


DySAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.417864,0.410208,0.339887,0.395106,sigmoid
2,2,0.421172,0.393195,0.327442,0.385709,HAD
2,2,0.415186,0.400756,0.330592,0.38849,AVG
2,2,0.404001,0.39225,0.320589,0.377608,L1
2,2,0.400536,0.397921,0.328859,0.381771,L2


VGRNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.522779,0.524575,0.424874,0.495112,sigmoid
2,2,0.306648,0.327977,0.245841,0.307046,HAD
2,2,0.291525,0.31569,0.233554,0.294016,AVG
2,2,0.291525,0.31569,0.233554,0.294016,L1
2,2,0.539792,0.538752,0.439052,0.510032,L2


EvolveGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.376118,0.382798,0.311216,0.367943,sigmoid
2,2,0.487492,0.50189,0.424401,0.47843,HAD
2,2,0.393604,0.408318,0.335082,0.388377,AVG
2,2,0.393604,0.408318,0.335082,0.388377,L1
2,2,0.393604,0.408318,0.335082,0.388377,L2


CTGCN-C


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.428733,0.414934,0.340044,0.398009,sigmoid
2,2,0.39644,0.417769,0.321692,0.383765,HAD
2,2,0.389193,0.378072,0.282624,0.349158,AVG
2,2,0.46402,0.42344,0.353986,0.414838,L1
2,2,0.420227,0.402647,0.325866,0.382547,L2


In [7]:
exp = 'dblp'
for method in methods:
    time_id = 6
    node_embedding = pd.read_csv(
                './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values

    ranking = make_prediction(pred_idx, node_embedding, models)
    eval_metrics = []
    for operator, rk in ranking.items():
        pd_pred = pd.DataFrame(rk, columns=['source', 'target', 'sims']).sort_values(['sims'], ascending=False)
        pred = pd_pred.groupby('source').agg({'target': list}).to_dict()['target']
        res = eval_ranking(pred, ground_truth, k=5)
        res['operator'] = operator
        eval_metrics.append(res)
    pd_res = pd.concat(eval_metrics)
    print(method)
    display(pd_res[pd_res['k']==2])

GCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.547222,0.555243,0.456164,0.529393,sigmoid
2,2,0.464825,0.455993,0.372441,0.440931,HAD
2,2,0.487141,0.388577,0.344741,0.403249,AVG
2,2,0.436735,0.447566,0.350281,0.419674,L1
2,2,0.465918,0.453184,0.365964,0.436772,L2


GAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.383989,0.400749,0.328901,0.384407,sigmoid
2,2,0.383989,0.400749,0.328901,0.384407,HAD
2,2,0.487141,0.476592,0.419335,0.462067,AVG
2,2,0.369788,0.378277,0.287547,0.352524,L1
2,2,0.409738,0.398876,0.318134,0.380882,L2


TgGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.344195,0.366105,0.277325,0.337979,sigmoid
2,2,0.442665,0.430712,0.345755,0.41022,HAD
2,2,0.366199,0.38764,0.315481,0.371466,AVG
2,2,0.296754,0.321161,0.239794,0.299178,L1
2,2,0.298159,0.322097,0.24073,0.300627,L2


TgGAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.44407,0.457865,0.367759,0.431401,sigmoid
2,2,0.450624,0.455993,0.362531,0.430865,HAD
2,2,0.366979,0.394195,0.312047,0.373562,AVG
2,2,0.449532,0.455056,0.362531,0.42819,L1
2,2,0.447971,0.438202,0.361283,0.42431,L2


TgSAGE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.403496,0.422285,0.32539,0.389132,sigmoid
2,2,0.372441,0.388577,0.302294,0.362971,HAD
2,2,0.487141,0.476592,0.419335,0.462067,AVG
2,2,0.419257,0.424157,0.336392,0.399968,L1
2,2,0.426592,0.42603,0.33171,0.400958,L2


TgGIN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.336704,0.357678,0.269522,0.332283,sigmoid
2,2,0.42035,0.419476,0.317197,0.386789,HAD
2,2,0.487141,0.388577,0.344741,0.403249,AVG
2,2,0.298159,0.322097,0.24073,0.300627,L1
2,2,0.298159,0.322097,0.24073,0.300627,L2


GCRN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.441573,0.432584,0.360971,0.421267,sigmoid
2,2,0.523658,0.490637,0.420974,0.483175,HAD
2,2,0.292541,0.315543,0.234176,0.294317,AVG
2,2,0.415044,0.397004,0.315559,0.379933,L1
2,2,0.433458,0.411985,0.341932,0.4007,L2


TIMERS


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.586704,0.618914,0.525297,0.58835,sigmoid
2,2,0.484644,0.462547,0.371895,0.444875,HAD
2,2,0.314856,0.340824,0.23363,0.295523,AVG
2,2,0.492759,0.48221,0.424251,0.467418,L1
2,2,0.315637,0.330524,0.245646,0.299576,L2


DynAE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.314856,0.34176,0.234566,0.296215,sigmoid
2,2,0.485268,0.481273,0.417775,0.463025,HAD
2,2,0.366199,0.38764,0.315481,0.371466,AVG
2,2,0.321255,0.345506,0.241979,0.305154,L1
2,2,0.321255,0.345506,0.241979,0.305154,L2


DynAERNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.315637,0.338951,0.235424,0.298845,sigmoid
2,2,0.464045,0.420412,0.310877,0.396073,HAD
2,2,0.464045,0.501873,0.397019,0.45754,AVG
2,2,0.321255,0.345506,0.241979,0.305154,L1
2,2,0.321255,0.343633,0.240106,0.303772,L2


DynGEM


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.521629,0.494382,0.429323,0.483645,sigmoid
2,2,0.484176,0.47191,0.365574,0.442533,HAD
2,2,0.315637,0.338951,0.235424,0.298845,AVG
2,2,0.358864,0.381086,0.298783,0.360631,L1
2,2,0.361673,0.388577,0.309238,0.369326,L2


DySAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.411454,0.396067,0.327965,0.383201,sigmoid
2,2,0.42113,0.410112,0.340762,0.396935,HAD
2,2,0.432678,0.421348,0.350671,0.407056,AVG
2,2,0.417853,0.399813,0.332179,0.387627,L1
2,2,0.452497,0.422285,0.361985,0.417332,L2


VGRNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.519757,0.520599,0.421832,0.491657,sigmoid
2,2,0.307522,0.327715,0.246348,0.307225,HAD
2,2,0.292541,0.315543,0.234176,0.294317,AVG
2,2,0.298159,0.322097,0.24073,0.300627,L1
2,2,0.534738,0.533708,0.434941,0.505256,L2


EvolveGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.394444,0.412921,0.338187,0.393881,sigmoid
2,2,0.479963,0.480337,0.411845,0.466034,HAD
2,2,0.383989,0.400749,0.328901,0.384407,AVG
2,2,0.393976,0.404494,0.331398,0.3895,L1
2,2,0.384769,0.398876,0.326717,0.383939,L2


CTGCN-C


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.450936,0.43633,0.358162,0.41701,sigmoid
2,2,0.398346,0.418539,0.323361,0.385098,HAD
2,2,0.389295,0.377341,0.282787,0.348943,AVG
2,2,0.469039,0.428839,0.360034,0.420317,L1
2,2,0.423783,0.403558,0.327497,0.384382,L2


In [15]:
methods = [
    'GCN', 'GAT', #'SAGE', 'GIN', 
    'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
    'GCRN', 'TIMERS', 'DynAE', #'DynRNN', 
    'DynAERNN', 'DynGEM', 'DySAT',
    'VGRNN', 'EvolveGCN', 'CTGCN-C',
]
df, models = print_report(exp='dblp', methods=methods)

GCN
5949 12311 1409 2786 759 1886
GAT
5949 12311 1409 2786 759 1886
TgGCN
5949 12311 1409 2786 759 1886
TgGAT
5949 12311 1409 2786 759 1886
TgSAGE
5949 12311 1409 2786 759 1886
TgGIN
5949 12311 1409 2786 759 1886
GCRN
5949 12311 1409 2786 759 1886
TIMERS
5949 12311 1409 2786 759 1886
DynAE
5193 10667 1409 2786 759 1886
DynAERNN
5193 10667 1409 2786 759 1886
DynGEM
5949 12311 1409 2786 759 1886
DySAT
5949 12311 1409 2786 759 1886
VGRNN
5949 12311 1409 2786 759 1886
EvolveGCN
5949 12311 1409 2786 759 1886
CTGCN-C
5949 12311 1409 2786 759 1886
Full report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
0,train,0.580619,0.489292,0.5551,0.0,0.555284,0.0,0.576915,0.0,0.574239,0.0,GCN,0.580619,0.489292
1,val,0.604251,0.501357,0.527273,0.0,0.574047,0.0,0.594976,0.0,0.593025,0.0,GCN,0.604251,0.501357
2,test,0.612739,0.444248,0.503518,0.0,0.471489,0.0,0.602248,0.0,0.603026,0.0,GCN,0.612739,0.444248
3,train,0.5,0.49147,0.555153,0.0,0.555706,0.0,0.565627,0.0,0.565508,0.0,GAT,0.565627,0.49147
4,val,0.5,0.502855,0.55824,0.0,0.558657,0.0,0.574856,0.0,0.574362,0.0,GAT,0.574856,0.502855
5,test,0.5,0.445946,0.523163,0.0,0.523001,0.0,0.56842,0.0,0.570361,0.0,GAT,0.570361,0.445946
6,train,0.550593,0.202253,0.549053,0.0,0.538984,0.0,0.518486,0.0,0.501304,0.0,TgGCN,0.550593,0.202253
7,val,0.556282,0.214088,0.556717,0.0,0.570001,0.0,0.506594,0.0,0.490669,0.0,TgGCN,0.570001,0.214088
8,test,0.561694,0.265976,0.562762,0.0,0.481103,0.0,0.480071,0.0,0.48053,0.0,TgGCN,0.562762,0.265976
9,train,0.535343,0.475571,0.554155,0.0,0.542727,0.0,0.567358,0.0,0.566798,0.0,TgGAT,0.567358,0.475571


Test report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
2,test,0.612739,0.444248,0.503518,0.0,0.471489,0.0,0.602248,0.0,0.603026,0.0,GCN,0.612739,0.444248
5,test,0.5,0.445946,0.523163,0.0,0.523001,0.0,0.56842,0.0,0.570361,0.0,GAT,0.570361,0.445946
8,test,0.561694,0.265976,0.562762,0.0,0.481103,0.0,0.480071,0.0,0.48053,0.0,TgGCN,0.562762,0.265976
11,test,0.540466,0.453242,0.504695,0.0,0.501387,0.0,0.591223,0.0,0.591225,0.0,TgGAT,0.591225,0.453242
14,test,0.532003,0.398821,0.477569,0.0,0.486947,0.0,0.566145,0.0,0.566339,0.0,TgSAGE,0.566339,0.398821
17,test,0.549542,0.213725,0.40965,0.0,0.461855,0.0,0.470594,0.0,0.426502,0.0,TgGIN,0.549542,0.213725
20,test,0.536459,0.437811,0.592245,0.04908,0.56781,0.0,0.572748,0.240588,0.575493,0.012837,GCRN,0.592245,0.437811
23,test,0.684041,0.509038,0.756864,0.410697,0.513067,0.0,0.581598,0.33204,0.580275,0.365759,TIMERS,0.756864,0.509038
26,test,0.504181,0.445946,0.458885,0.0,0.597534,0.0,0.552572,0.0,0.435636,0.0,DynAE,0.597534,0.445946
29,test,0.414933,0.445946,0.41049,0.0,0.593267,0.0,0.55215,0.0,0.447785,0.0,DynAERNN,0.593267,0.445946


Selected methods


Unnamed: 0,method,sigmoid,HAD,AVG,L1,L2,best_AUC,sigmoid_f1,HAD_f1,AVG_f1,L1_f1,L2_f1,best_F1
2,GCN,0.612739,0.503518,0.471489,0.602248,0.603026,0.612739,0.444248,0.0,0.0,0.0,0.0,0.444248
11,TgGAT,0.540466,0.504695,0.501387,0.591223,0.591225,0.591225,0.453242,0.0,0.0,0.0,0.0,0.453242
14,TgSAGE,0.532003,0.477569,0.486947,0.566145,0.566339,0.566339,0.398821,0.0,0.0,0.0,0.0,0.398821
17,TgGIN,0.549542,0.40965,0.461855,0.470594,0.426502,0.549542,0.213725,0.0,0.0,0.0,0.0,0.213725
20,GCRN,0.536459,0.592245,0.56781,0.572748,0.575493,0.592245,0.437811,0.04908,0.0,0.240588,0.012837,0.437811
23,TIMERS,0.684041,0.756864,0.513067,0.581598,0.580275,0.756864,0.509038,0.410697,0.0,0.33204,0.365759,0.509038
26,DynAE,0.504181,0.458885,0.597534,0.552572,0.435636,0.597534,0.445946,0.0,0.0,0.0,0.0,0.445946
29,DynAERNN,0.414933,0.41049,0.593267,0.55215,0.447785,0.593267,0.445946,0.0,0.0,0.0,0.0,0.445946
32,DynGEM,0.567981,0.545054,0.583522,0.445676,0.507668,0.583522,0.445946,0.16684,0.0,0.168474,0.372968,0.445946
35,DySAT,0.505889,0.514286,0.514579,0.538453,0.536407,0.538453,0.4361,0.0,0.0,0.007853,0.010403,0.4361


In [16]:
exp = 'dblp'
data = pd.read_pickle(f'./data/{exp}/0.input/data.pkl')
all_labels = list(set(itertools.chain(*[d['label'] for i, d in data.items()])))
cid2cname = pd.read_pickle('/home/hoang/github/DySubG/dataset/dblp/cid2cname.pkl')
cname2cid = {j:i for i,j in cid2cname.items() if j in all_labels}
ground_truth = {}
pred_idx = {}
for i,d in data.items():
    node_id = d['node_id']
    time_id = d['time_id']
    pos_labels = d['label']
    neg_labels = [i for i in all_labels if i not in pos_labels]
    if time_id == 6:
        if len(pos_labels) > 0:
            ground_truth[node_id] = [cname2cid[l] for l in pos_labels]
        pred_idx[node_id] = [cname2cid[l] for l in all_labels]

In [12]:
exp = 'dblp'
for method in methods:
    time_id = 6
    node_embedding = pd.read_csv(
                './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values

    ranking = make_prediction(pred_idx, node_embedding, models)
    eval_metrics = []
    for operator, rk in ranking.items():
        pd_pred = pd.DataFrame(rk, columns=['source', 'target', 'sims']).sort_values(['sims'], ascending=False)
        pred = pd_pred.groupby('source').agg({'target': list}).to_dict()['target']
        res = eval_ranking(pred, ground_truth, k=5)
        res['operator'] = operator
        eval_metrics.append(res)
    pd_res = pd.concat(eval_metrics)
    print(method)
    display(pd_res[pd_res['k']==2])

GCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.464512,0.516367,0.395636,0.470063,sigmoid
2,2,0.385406,0.391162,0.297981,0.367076,HAD
2,2,0.370622,0.339607,0.26237,0.326253,AVG
2,2,0.34599,0.38216,0.270226,0.339108,L1
2,2,0.375177,0.388707,0.283252,0.353009,L2


GAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.267512,0.323241,0.221345,0.275474,sigmoid
2,2,0.42946,0.436989,0.316189,0.399175,HAD
2,2,0.370622,0.339607,0.26237,0.326253,AVG
2,2,0.320158,0.351064,0.249482,0.309491,L1
2,2,0.328478,0.345336,0.253983,0.312775,L2


TgGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.350355,0.403437,0.288502,0.351103,sigmoid
2,2,0.365357,0.399345,0.295117,0.359304,HAD
2,2,0.316339,0.374795,0.255101,0.318886,AVG
2,2,0.332215,0.396072,0.275341,0.337425,L1
2,2,0.328942,0.350245,0.23422,0.307552,L2


TgGAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.38036,0.40671,0.302482,0.371044,sigmoid
2,2,0.392908,0.402619,0.300914,0.37144,HAD
2,2,0.309029,0.306056,0.230947,0.296203,AVG
2,2,0.373131,0.404255,0.295322,0.367497,L1
2,2,0.359356,0.378887,0.284274,0.35072,L2


TgSAGE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.361211,0.386252,0.284424,0.348558,sigmoid
2,2,0.366667,0.388707,0.280742,0.349748,HAD
2,2,0.42946,0.468903,0.343331,0.419227,AVG
2,2,0.367894,0.39689,0.285925,0.355271,L1
2,2,0.37736,0.404255,0.291162,0.361711,L2


TgGIN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.355265,0.368249,0.258088,0.333057,sigmoid
2,2,0.406219,0.439444,0.318249,0.390293,HAD
2,2,0.370622,0.339607,0.26237,0.326253,AVG
2,2,0.328942,0.350245,0.23422,0.307552,L1
2,2,0.328942,0.350245,0.23422,0.307552,L2


GCRN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.321713,0.354337,0.261429,0.320842,sigmoid
2,2,0.374223,0.380524,0.29955,0.357005,HAD
2,2,0.240835,0.274959,0.192553,0.248693,AVG
2,2,0.293208,0.315876,0.235243,0.291616,L1
2,2,0.338762,0.358429,0.274045,0.33419,L2


TIMERS


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.520158,0.581833,0.461443,0.535034,sigmoid
2,2,0.3509,0.389525,0.271113,0.342763,HAD
2,2,0.320349,0.385434,0.265998,0.325579,AVG
2,2,0.4491,0.427169,0.350696,0.41298,L1
2,2,0.324032,0.364975,0.252087,0.316438,L2


DynAE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.248145,0.298691,0.187316,0.246629,sigmoid
2,2,0.329705,0.364975,0.277196,0.325541,HAD
2,2,0.320349,0.385434,0.265998,0.325579,AVG
2,2,0.256738,0.303601,0.19659,0.257345,L1
2,2,0.255101,0.301964,0.193863,0.254774,L2


DynAERNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.322395,0.342881,0.226855,0.300401,sigmoid
2,2,0.322395,0.342881,0.226855,0.300401,HAD
2,2,0.361266,0.438625,0.306301,0.373257,AVG
2,2,0.327305,0.346972,0.230947,0.304707,L1
2,2,0.327305,0.346972,0.230947,0.304707,L2


DynGEM


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.438325,0.437807,0.34524,0.411189,sigmoid
2,2,0.399182,0.441899,0.312916,0.388266,HAD
2,2,0.322395,0.386252,0.266612,0.328209,AVG
2,2,0.327305,0.388707,0.269272,0.331444,L1
2,2,0.300846,0.337152,0.260952,0.314356,L2


DySAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.319122,0.350245,0.259929,0.317803,sigmoid
2,2,0.330033,0.373159,0.277591,0.336246,HAD
2,2,0.320758,0.369885,0.264839,0.327732,AVG
2,2,0.325532,0.370704,0.272272,0.33007,L1
2,2,0.345581,0.374795,0.288025,0.346327,L2


VGRNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.416367,0.456628,0.331056,0.406738,sigmoid
2,2,0.333852,0.388707,0.266885,0.333488,HAD
2,2,0.322395,0.386252,0.266612,0.328209,AVG
2,2,0.328942,0.393617,0.273977,0.33536,L1
2,2,0.426187,0.466448,0.340876,0.416558,L2


EvolveGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.248691,0.279869,0.188789,0.243811,sigmoid
2,2,0.296154,0.337152,0.241435,0.296312,HAD
2,2,0.267512,0.323241,0.221345,0.275474,AVG
2,2,0.283333,0.327332,0.230892,0.285897,L1
2,2,0.269149,0.320786,0.21773,0.273584,L2


CTGCN-C


Unnamed: 0,k,recall,mrr,map,ndcg,operator
2,2,0.31293,0.337971,0.233797,0.297977,sigmoid
2,2,0.38467,0.39198,0.293221,0.361484,HAD
2,2,0.329705,0.364975,0.277196,0.325541,AVG
2,2,0.364348,0.383797,0.282351,0.345264,L1
2,2,0.303655,0.335516,0.243412,0.29713,L2


In [None]:
0.464512	0.516367	0.395636	0.470063	
0.392908	0.402619	0.300914	0.371440
0.429460	0.468903	0.343331	0.419227	

In [18]:
exp = 'dblp'
for method in methods:
    time_id = 6
    node_embedding = pd.read_csv(
                './data/{}/2.embedding/{}/{:02d}.csv'.format(exp, method, time_id), index_col=0, sep='\t').values

    ranking = make_prediction(pred_idx, node_embedding, models)
    eval_metrics = []
    for operator, rk in ranking.items():
        pd_pred = pd.DataFrame(rk, columns=['source', 'target', 'sims']).sort_values(['sims'], ascending=False)
        pred = pd_pred.groupby('source').agg({'target': list}).to_dict()['target']
        res = eval_ranking(pred, ground_truth, k=5)
        res['operator'] = operator
        eval_metrics.append(res)
    pd_res = pd.concat(eval_metrics)
    print(method)
    display(pd_res[pd_res['k']==3])

GCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.718872,0.600819,0.527447,0.599876,sigmoid
3,3,0.676181,0.526465,0.454537,0.535357,HAD
3,3,0.633333,0.451796,0.399795,0.479036,AVG
3,3,0.631285,0.499685,0.428019,0.503903,L1
3,3,0.700284,0.52489,0.458134,0.542412,L2


GAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.546692,0.451166,0.404521,0.458154,sigmoid
3,3,0.546692,0.451166,0.404521,0.458154,HAD
3,3,0.633333,0.540643,0.475095,0.536904,AVG
3,3,0.581191,0.438563,0.367948,0.446721,L1
3,3,0.579616,0.453686,0.385486,0.459106,L2


TgGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.520069,0.424701,0.344397,0.414399,sigmoid
3,3,0.600252,0.482672,0.409431,0.482848,HAD
3,3,0.534562,0.452741,0.376507,0.441509,AVG
3,3,0.460208,0.379017,0.294539,0.363635,L1
3,3,0.460208,0.379332,0.294854,0.363903,L2


TgGAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.646723,0.511342,0.451229,0.522357,sigmoid
3,3,0.644518,0.511657,0.444481,0.518564,HAD
3,3,0.685948,0.503466,0.43117,0.520477,AVG
3,3,0.645306,0.511657,0.437392,0.515145,L1
3,3,0.61443,0.488973,0.431511,0.498748,L2


TgSAGE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.615532,0.481727,0.408144,0.485417,sigmoid
3,3,0.610964,0.468494,0.393599,0.473176,HAD
3,3,0.708475,0.564587,0.502767,0.57616,AVG
3,3,0.633018,0.493069,0.419093,0.497439,L1
3,3,0.653182,0.496219,0.419723,0.504122,L2


TgGIN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.532042,0.416194,0.351329,0.419762,sigmoid
3,3,0.594108,0.470384,0.388847,0.467315,HAD
3,3,0.708475,0.47574,0.427468,0.518291,AVG
3,3,0.512035,0.372401,0.334394,0.395593,L1
3,3,0.513926,0.373661,0.335024,0.396752,L2


GCRN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.620573,0.484562,0.435686,0.501467,sigmoid
3,3,0.654915,0.534342,0.477326,0.543234,HAD
3,3,0.512035,0.372401,0.334394,0.395593,AVG
3,3,0.607026,0.451166,0.393599,0.469043,L1
3,3,0.619786,0.470699,0.417229,0.48886,L2


TIMERS


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.720605,0.648708,0.58463,0.640738,sigmoid
3,3,0.698078,0.528355,0.467796,0.548085,HAD
3,3,0.460208,0.387209,0.294408,0.365432,AVG
3,3,0.633333,0.539698,0.47344,0.535836,L1
3,3,0.460208,0.379647,0.306537,0.371134,L2


DynAE


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.534562,0.406742,0.323918,0.404234,sigmoid
3,3,0.708475,0.565848,0.501927,0.576002,HAD
3,3,0.512035,0.426276,0.386536,0.434396,AVG
3,3,0.460208,0.387209,0.295274,0.365884,L1
3,3,0.460208,0.387209,0.295274,0.365884,L2


DynAERNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.53535,0.409893,0.323157,0.40514,sigmoid
3,3,0.610807,0.460618,0.375746,0.464358,HAD
3,3,0.685161,0.563012,0.492738,0.566613,AVG
3,3,0.53535,0.409893,0.323157,0.40514,L1
3,3,0.53535,0.409893,0.323157,0.40514,L2


DynGEM


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.713201,0.563327,0.500457,0.576342,sigmoid
3,3,0.656175,0.516698,0.438731,0.520351,HAD
3,3,0.460208,0.387209,0.295274,0.365884,AVG
3,3,0.536452,0.442029,0.361227,0.432092,L1
3,3,0.514556,0.427851,0.384436,0.434563,L2


DySAT


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.559294,0.455577,0.401554,0.461163,sigmoid
3,3,0.57552,0.444865,0.393153,0.458474,HAD
3,3,0.557089,0.448015,0.391525,0.453883,AVG
3,3,0.560555,0.440769,0.388768,0.451554,L1
3,3,0.550788,0.44644,0.392838,0.452276,L2


VGRNN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.691462,0.570573,0.494786,0.569538,sigmoid
3,3,0.47344,0.390989,0.306511,0.375988,HAD
3,3,0.460208,0.379332,0.294854,0.363903,AVG
3,3,0.460208,0.379332,0.294854,0.363903,L1
3,3,0.708475,0.584751,0.508963,0.584458,L2


EvolveGCN


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.536452,0.430057,0.382493,0.439561,sigmoid
3,3,0.65649,0.556081,0.495888,0.557053,HAD
3,3,0.546692,0.451166,0.404521,0.458154,AVG
3,3,0.546692,0.451166,0.404521,0.458154,L1
3,3,0.546692,0.451166,0.404521,0.458154,L2


CTGCN-C


Unnamed: 0,k,recall,mrr,map,ndcg,operator
3,3,0.614272,0.476686,0.415312,0.487062,sigmoid
3,3,0.625299,0.487713,0.414498,0.492136,HAD
3,3,0.708475,0.47196,0.406622,0.506061,AVG
3,3,0.62719,0.477001,0.420537,0.493974,L1
3,3,0.597574,0.463138,0.393783,0.468598,L2


In [8]:
# dysat_embedding = pd.read_pickle('../DySAT_pytorch/dblp.pkl').cpu().detach().numpy()
# for i in range(8):
#     embedding_path = './data/dblp/2.embedding/DySAT/{:02d}.csv'.format(i)
#     embs = dysat_embedding[:, i, :]
#     df_export = pd.DataFrame(data=embs)
#     df_export.to_csv(embedding_path, sep='\t', header=True, index=True)

In [9]:
methods = [
    'GCN', 'GAT', #'SAGE', 'GIN', 
    'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
    'GCRN', 'TIMERS', 'DynAE', #'DynRNN', 
    'DynAERNN', 'DynGEM', 
    'VGRNN', 'EvolveGCN', 'CTGCN-C',
]
df = print_report(exp='dblp', methods=methods, threshold=0.1)

GCN
6766 16778 1634 3880 939 2691
GAT
6766 16778 1634 3880 939 2691
TgGCN
6766 16778 1634 3880 939 2691
TgGAT
6766 16778 1634 3880 939 2691
TgSAGE
6766 16778 1634 3880 939 2691
TgGIN
6766 16778 1634 3880 939 2691
GCRN
6766 16778 1634 3880 939 2691
TIMERS
6766 16778 1634 3880 939 2691
DynAE
5926 14534 1634 3880 939 2691
DynAERNN
5926 14534 1634 3880 939 2691
DynGEM
6766 16778 1634 3880 939 2691
VGRNN
6766 16778 1634 3880 939 2691
EvolveGCN
6766 16778 1634 3880 939 2691
CTGCN-C
6766 16778 1634 3880 939 2691
Full report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
0,train,0.574367,0.446195,0.566038,0.0,0.55871,0.0,0.57238,0.0,0.570376,0.0,GCN,0.574367,0.446195
1,val,0.599924,0.456618,0.545354,0.0,0.576097,0.0,0.59048,0.0,0.588157,0.0,GCN,0.599924,0.456618
2,test,0.601287,0.40975,0.528716,0.0,0.493888,0.0,0.606964,0.0,0.607368,0.0,GCN,0.607368,0.40975
3,train,0.5,0.446453,0.551076,0.0,0.551232,0.0,0.568374,0.0,0.568344,0.0,GAT,0.568374,0.446453
4,val,0.5,0.457191,0.559883,0.0,0.55982,0.0,0.57218,0.0,0.571434,0.0,GAT,0.57218,0.457191
5,test,0.5,0.411031,0.536241,0.0,0.536097,0.0,0.574326,0.0,0.576399,0.0,GAT,0.576399,0.411031
6,train,0.539605,0.203712,0.538869,0.0,0.561354,0.0,0.528697,0.0,0.485712,0.0,TgGCN,0.561354,0.203712
7,val,0.552159,0.205827,0.551792,0.0,0.57381,0.0,0.512906,0.0,0.484653,0.0,TgGCN,0.57381,0.205827
8,test,0.57705,0.258741,0.577725,0.0,0.484433,0.0,0.487109,0.0,0.513177,0.0,TgGCN,0.577725,0.258741
9,train,0.528802,0.434968,0.526655,0.0,0.550534,0.0,0.562202,0.0,0.561647,0.0,TgGAT,0.562202,0.434968


Test report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
2,test,0.601287,0.40975,0.528716,0.0,0.493888,0.0,0.606964,0.0,0.607368,0.0,GCN,0.607368,0.40975
5,test,0.5,0.411031,0.536241,0.0,0.536097,0.0,0.574326,0.0,0.576399,0.0,GAT,0.576399,0.411031
8,test,0.57705,0.258741,0.577725,0.0,0.484433,0.0,0.487109,0.0,0.513177,0.0,TgGCN,0.577725,0.258741
11,test,0.538524,0.416469,0.451944,0.0,0.490131,0.0,0.589372,0.0,0.590014,0.0,TgGAT,0.590014,0.416469
14,test,0.532665,0.373384,0.464341,0.0,0.483685,0.0,0.573829,0.0,0.574548,0.0,TgSAGE,0.574548,0.373384
17,test,0.565183,0.209192,0.429162,0.0,0.50492,0.0,0.466866,0.0,0.427028,0.0,TgGIN,0.565183,0.209192
20,test,0.533966,0.402253,0.592604,0.002123,0.559301,0.0,0.566614,0.149466,0.568817,0.0,GCRN,0.592604,0.402253
23,test,0.680806,0.477099,0.739314,0.312236,0.513659,0.0,0.557099,0.135158,0.54507,0.002128,TIMERS,0.739314,0.477099
26,test,0.504422,0.411031,0.487809,0.0,0.549287,0.0,0.541968,0.0,0.542958,0.0,DynAE,0.549287,0.411031
29,test,0.443525,0.411031,0.441421,0.0,0.563988,0.0,0.521447,0.0,0.478693,0.0,DynAERNN,0.563988,0.411031


Selected methods


Unnamed: 0,method,sigmoid,HAD,AVG,L1,L2,best_AUC,sigmoid_f1,HAD_f1,AVG_f1,L1_f1,L2_f1,best_F1
2,GCN,0.601287,0.528716,0.493888,0.606964,0.607368,0.607368,0.40975,0.0,0.0,0.0,0.0,0.40975
11,TgGAT,0.538524,0.451944,0.490131,0.589372,0.590014,0.590014,0.416469,0.0,0.0,0.0,0.0,0.416469
14,TgSAGE,0.532665,0.464341,0.483685,0.573829,0.574548,0.574548,0.373384,0.0,0.0,0.0,0.0,0.373384
17,TgGIN,0.565183,0.429162,0.50492,0.466866,0.427028,0.565183,0.209192,0.0,0.0,0.0,0.0,0.209192
20,GCRN,0.533966,0.592604,0.559301,0.566614,0.568817,0.592604,0.402253,0.002123,0.0,0.149466,0.0,0.402253
23,TIMERS,0.680806,0.739314,0.513659,0.557099,0.54507,0.739314,0.477099,0.312236,0.0,0.135158,0.002128,0.477099
26,DynAE,0.504422,0.487809,0.549287,0.541968,0.542958,0.549287,0.411031,0.0,0.0,0.0,0.0,0.411031
29,DynAERNN,0.443525,0.441421,0.563988,0.521447,0.478693,0.563988,0.411031,0.0,0.0,0.0,0.0,0.411031
32,DynGEM,0.574835,0.533583,0.545538,0.500516,0.48604,0.574835,0.411031,0.115419,0.002128,0.022587,0.277726,0.411031
35,VGRNN,0.567795,0.579317,0.573978,0.555672,0.553506,0.579317,0.413342,0.0,0.0,0.0,0.0,0.413342


In [18]:
methods = [
    'GCN', 'GAT', #'SAGE', 'GIN', 
    'TgGCN', 'TgGAT', 'TgSAGE', 'TgGIN', 
    'GCRN', 'TIMERS', 'DynAE', #'DynRNN', 
    'DynAERNN', 'DynGEM', 
    'VGRNN', 'EvolveGCN', 'CTGCN-C',
]
df = print_report(exp='dblp', methods=methods)

GCN
6766 16778 1634 3880 939 2691
GAT
6766 16778 1634 3880 939 2691
TgGCN
6766 16778 1634 3880 939 2691
TgGAT
6766 16778 1634 3880 939 2691
TgSAGE
6766 16778 1634 3880 939 2691
TgGIN
6766 16778 1634 3880 939 2691
GCRN
6766 16778 1634 3880 939 2691
TIMERS
6766 16778 1634 3880 939 2691
DynAE
5926 14534 1634 3880 939 2691
DynAERNN
5926 14534 1634 3880 939 2691
DynGEM
6766 16778 1634 3880 939 2691
VGRNN
6766 16778 1634 3880 939 2691
EvolveGCN
6766 16778 1634 3880 939 2691
CTGCN-C
6766 16778 1634 3880 939 2691
Full report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
0,train,0.574367,0.446195,0.566038,0.0,0.55871,0.0,0.57238,0.0,0.570376,0.0,GCN,0.574367,0.446195
1,val,0.599924,0.456618,0.545354,0.0,0.576097,0.0,0.59048,0.0,0.588157,0.0,GCN,0.599924,0.456618
2,test,0.601287,0.40975,0.528716,0.0,0.493888,0.0,0.606964,0.0,0.607368,0.0,GCN,0.607368,0.40975
3,train,0.5,0.446453,0.551076,0.0,0.551232,0.0,0.568374,0.0,0.568344,0.0,GAT,0.568374,0.446453
4,val,0.5,0.457191,0.559883,0.0,0.55982,0.0,0.57218,0.0,0.571434,0.0,GAT,0.57218,0.457191
5,test,0.5,0.411031,0.536241,0.0,0.536097,0.0,0.574326,0.0,0.576399,0.0,GAT,0.576399,0.411031
6,train,0.539605,0.203712,0.538869,0.0,0.561354,0.0,0.528697,0.0,0.485712,0.0,TgGCN,0.561354,0.203712
7,val,0.552159,0.205827,0.551792,0.0,0.57381,0.0,0.512906,0.0,0.484653,0.0,TgGCN,0.57381,0.205827
8,test,0.57705,0.258741,0.577725,0.0,0.484433,0.0,0.487109,0.0,0.513177,0.0,TgGCN,0.577725,0.258741
9,train,0.528802,0.434968,0.526655,0.0,0.550534,0.0,0.562202,0.0,0.561647,0.0,TgGAT,0.562202,0.434968


Test report


Unnamed: 0,dataset,sigmoid,sigmoid_f1,HAD,HAD_f1,AVG,AVG_f1,L1,L1_f1,L2,L2_f1,method,best_AUC,best_F1
2,test,0.601287,0.40975,0.528716,0.0,0.493888,0.0,0.606964,0.0,0.607368,0.0,GCN,0.607368,0.40975
5,test,0.5,0.411031,0.536241,0.0,0.536097,0.0,0.574326,0.0,0.576399,0.0,GAT,0.576399,0.411031
8,test,0.57705,0.258741,0.577725,0.0,0.484433,0.0,0.487109,0.0,0.513177,0.0,TgGCN,0.577725,0.258741
11,test,0.538524,0.416469,0.451944,0.0,0.490131,0.0,0.589372,0.0,0.590014,0.0,TgGAT,0.590014,0.416469
14,test,0.532665,0.373384,0.464341,0.0,0.483685,0.0,0.573829,0.0,0.574548,0.0,TgSAGE,0.574548,0.373384
17,test,0.565183,0.209192,0.429162,0.0,0.50492,0.0,0.466866,0.0,0.427028,0.0,TgGIN,0.565183,0.209192
20,test,0.533966,0.402253,0.592604,0.002123,0.559301,0.0,0.566614,0.149466,0.568817,0.0,GCRN,0.592604,0.402253
23,test,0.680806,0.477099,0.739314,0.312236,0.513659,0.0,0.557099,0.135158,0.54507,0.002128,TIMERS,0.739314,0.477099
26,test,0.504422,0.411031,0.487809,0.0,0.549287,0.0,0.541968,0.0,0.542958,0.0,DynAE,0.549287,0.411031
29,test,0.443525,0.411031,0.441421,0.0,0.563988,0.0,0.521447,0.0,0.478693,0.0,DynAERNN,0.563988,0.411031


Selected methods


Unnamed: 0,method,sigmoid,HAD,AVG,L1,L2,best_AUC,sigmoid_f1,HAD_f1,AVG_f1,L1_f1,L2_f1,best_F1
2,GCN,0.601287,0.528716,0.493888,0.606964,0.607368,0.607368,0.40975,0.0,0.0,0.0,0.0,0.40975
11,TgGAT,0.538524,0.451944,0.490131,0.589372,0.590014,0.590014,0.416469,0.0,0.0,0.0,0.0,0.416469
14,TgSAGE,0.532665,0.464341,0.483685,0.573829,0.574548,0.574548,0.373384,0.0,0.0,0.0,0.0,0.373384
17,TgGIN,0.565183,0.429162,0.50492,0.466866,0.427028,0.565183,0.209192,0.0,0.0,0.0,0.0,0.209192
20,GCRN,0.533966,0.592604,0.559301,0.566614,0.568817,0.592604,0.402253,0.002123,0.0,0.149466,0.0,0.402253
23,TIMERS,0.680806,0.739314,0.513659,0.557099,0.54507,0.739314,0.477099,0.312236,0.0,0.135158,0.002128,0.477099
26,DynAE,0.504422,0.487809,0.549287,0.541968,0.542958,0.549287,0.411031,0.0,0.0,0.0,0.0,0.411031
29,DynAERNN,0.443525,0.441421,0.563988,0.521447,0.478693,0.563988,0.411031,0.0,0.0,0.0,0.0,0.411031
32,DynGEM,0.574835,0.533583,0.545538,0.500516,0.48604,0.574835,0.411031,0.115419,0.002128,0.022587,0.277726,0.411031
35,VGRNN,0.567795,0.579317,0.573978,0.555672,0.553506,0.579317,0.413342,0.0,0.0,0.0,0.0,0.413342


In [5]:
a.shape

torch.Size([11512, 5, 128])