# This notebook is prepared to show eland model training results 

In [9]:
import os
import pickle
import sys
import json
import math
import logging
import pickle as pk
from collections import Counter
import numpy as np
import pandas as pd
import scipy.sparse as sp
from scipy.sparse import csr_matrix, coo_matrix
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.nn import MSELoss, CosineEmbeddingLoss
from sklearn.metrics import precision_recall_curve, roc_curve, auc, average_precision_score, roc_auc_score, f1_score

## loading data 

### user label

In [15]:
user_label = pd.read_csv("../../data/02_intermediate/user_behavior/user_label.csv")

In [16]:
user_label.head(10)

Unnamed: 0,author,label
0,ultimatt42,0
1,jonknee,0
2,dons,0
3,Jedravent,0
4,burtonmkz,0
5,pavel_lishin,0
6,sblinn,0
7,WebZen,0
8,doodahdei,0
9,Tack122,0


## user and subreddit topic index

In [17]:
with open("../../data/02_intermediate/user_behavior/u2index.pkl","rb") as f:
    u2index = pickle.load(f)

In [18]:
u2index

{'0_o': 0,
 '138': 1,
 '13ren': 2,
 '1812overture': 3,
 '1esproc': 4,
 '315was_an_inside_job': 5,
 '43P04T34': 6,
 '7oby': 7,
 'AAjax': 8,
 'ABabyAteMyDingo': 9,
 'ANSICL': 10,
 'AbouBenAdhem': 11,
 'Aerik': 12,
 'Ajenthavoc': 13,
 'AliasHandler': 14,
 'AmericanGoyBlog': 15,
 'AngelaMotorman': 16,
 'AngledLuffa': 17,
 'Anonymous7777': 18,
 'AnteChronos': 19,
 'ApostrophePosse': 20,
 'ArcticCelt': 21,
 'Bagel': 22,
 'Battleloser': 23,
 'BedtimeForSheeple': 24,
 'BeetleB': 25,
 'Benny_Lava': 26,
 'Bensch': 27,
 'Bixie': 28,
 'Bloodlustt': 29,
 'Bloody_Eye': 30,
 'BlueBeard': 31,
 'BobGaffney': 32,
 'BraveSirRobin': 33,
 'BrianBoyko': 34,
 'Browzer': 35,
 'Burlapin': 36,
 'Busybyeski': 37,
 'CampusTour': 38,
 'CannedMango': 39,
 'Captain-Obliviouss': 40,
 'Chirp08': 41,
 'ChunkyLaFunga': 42,
 'Ciserus': 43,
 'Clothos': 44,
 'CodeMonkey1': 45,
 'Codebender': 46,
 'ColdSnickersBar': 47,
 'Cookie': 48,
 'CrackIsGoodForYou': 49,
 'CrimsonSun99': 50,
 'D-Style': 51,
 'DCGaymer': 52,
 'DOGA': 5

In [19]:
with open("../../data/02_intermediate/user_behavior/p2index.pkl","rb") as f:
    p2index = pickle.load(f)

In [20]:
p2index

{'AskReddit': 0,
 'Drugs': 1,
 'Economics': 2,
 'Music': 3,
 'WTF': 4,
 'apple': 5,
 'area51': 6,
 'atheism': 7,
 'bestof': 8,
 'business': 9,
 'canada': 10,
 'cogsci': 11,
 'comics': 12,
 'entertainment': 13,
 'environment': 14,
 'funny': 15,
 'gadgets': 16,
 'gaming': 17,
 'geek': 18,
 'happy': 19,
 'lgbt': 20,
 'linux': 21,
 'lolcats': 22,
 'math': 23,
 'netsec': 24,
 'nsfw': 25,
 'obama': 26,
 'offbeat': 27,
 'philosophy': 28,
 'photography': 29,
 'pics': 30,
 'politics': 31,
 'programming': 32,
 'psychology': 33,
 'reddit.com': 34,
 'science': 35,
 'scifi': 36,
 'self': 37,
 'sex': 38,
 'software': 39,
 'sports': 40,
 'technology': 41,
 'videos': 42,
 'web_design': 43,
 'worldnews': 44,
 'xkcd': 45,
 'yourweek': 46}

## edge list data 

In [22]:
edgelist_df = pd.read_csv("../../data/02_intermediate/user_behavior/edge_list.csv")

In [23]:
edgelist_df.head(10)

Unnamed: 0,author,subreddit,retrieved_on
0,ultimatt42,science,1425846806
1,jonknee,programming,1425846807
2,burtonmkz,science,1425846810
3,pavel_lishin,reddit.com,1425846810
4,pavel_lishin,reddit.com,1425846810
5,sblinn,politics,1425846810
6,dons,programming,1425846811
7,Jedravent,politics,1425846811
8,WebZen,politics,1425846811
9,doodahdei,politics,1425846812


In [59]:
from scipy.sparse import csr_matrix, coo_matrix
def process_edgelist(edge_list, u2index, p2index):
    """ Load edge list and construct a graph """
    edges = Counter()

    for i, row in edge_list.iterrows():
        #u = row[0]
        #p = row[1]
        #t = row[2]
        u = row['author']
        p = row['subreddit']
        t = row['retrieved_on']

        if i<1:
            print(u, p, t)
        edges[(u2index[u], p2index[p])] += 1
    # Construct the graph
    row = []
    col = []
    entry = []
    for edge, w in edges.items():
        #print(w)
        i, j = edge
        row.append(i)
        col.append(j)
        entry.append(w)
    graph = csr_matrix(
        (entry, (row, col)), 
        shape=(len(u2index), len(p2index))
    )   
    return graph

In [60]:
graph = process_edgelist(edgelist_df, u2index, p2index)

ultimatt42 science 1425846806


In [61]:
type(graph)

scipy.sparse.csr.csr_matrix

## train/validation/test id split

In [32]:
with open("../../data/02_intermediate/user_behavior/data_tvt.pkl","rb") as f:
    tvt_idx = pickle.load(f)

In [33]:
idx_train, idx_val, idx_test = tvt_idx

In [34]:
idx_train.shape, idx_val.shape, idx_test.shape

((314,), (79,), (393,))

### convert label format (to numpy array)

In [40]:
def process_label(labels: pd.DataFrame) -> np.array:
    """process label information"""
    u_all = set()
    pos_uids = set()
    labeled_uids = set()
    #convert a dataframe to an numpy array, array index being mapped indexes from u2index
    for i,row in labels.iterrows():
        author = row['author']
        author_label = row['label']
        u_all.add(author)
        if author_label == 1:
            pos_uids.add(author)
            labeled_uids.add(author)
        elif author_label == 0:
            labeled_uids.add(author)
    print(f'loaded labels, total of {len(pos_uids)} positive users and {len(labeled_uids)} labeled users')
    labels = np.zeros(len(u2index))
    for u in u2index:
        if u in pos_uids:
            labels[u2index[u]] = 1
    labels = labels.astype(int)
    return labels

In [41]:
labels = process_label(user_label)

loaded labels, total of 327 positive users and 787 labeled users


In [43]:
print('Train: total of {:5} users with {:5} pos users and {:5} neg users'.format(
    len(idx_train), 
    np.sum(labels[idx_train]), 
    len(idx_train)-np.sum(labels[idx_train]))
     )
print('Val:   total of {:5} users with {:5} pos users and {:5} neg users'.format(
    len(idx_val), 
    np.sum(labels[idx_val]), 
    len(idx_val)-np.sum(labels[idx_val]))
     )
print('Test:  total of {:5} users with {:5} pos users and {:5} neg users'.format(
    len(idx_test), 
    np.sum(labels[idx_test]), 
    len(idx_test)-np.sum(labels[idx_test]))
     )

Train: total of   314 users with   131 pos users and   183 neg users
Val:   total of    79 users with    38 pos users and    41 neg users
Test:  total of   393 users with   157 pos users and   236 neg users


In [25]:
user_features = np.load("../../data/02_intermediate/user_behavior/user2vec_npy.npz")

In [30]:
user_features['data'].shape #787 users

(787, 300)

In [28]:
item_features = np.load("../../data/02_intermediate/user_behavior/prod2vec_npy.npz")

In [31]:
item_features['data'].shape #47 topics

(47, 300)

### setting up the model trainer 

In [None]:
#sys.path.append('/home/ec2-user/SageMaker/anomaly-detection-spatial-temporal-data/')
sys.path.append('/home/ec2-user/SageMaker/anomaly-detection-spatial-temporal-data/src/')

In [None]:
from model.data_loader import DynamicGraphWNFDataSet, DynamicGraphWNodeFeatDatasetLoader
from model.dynamic_graph import Eland_e2e
from model.model_config import ElandConfig


In [None]:
data_loader = DynamicGraphWNodeFeatDatasetLoader(
    labels, 
    u2index, 
    p2index, 
    edge_list, 
    tvt_nids, 
    user_features, 
    item_features
)

#sequential data loader
dataset = DynamicGraphWNFDataSet(p2index, item_features, edge_list)
lstm_dataloader = DataLoader(dataset, batch_size=300)
    

In [None]:
data_dict = {
        'graph': data_loader.graph, 
        'lstm_dataloader': lstm_dataloader,
        'user_features': data_loader.user_features,
        'item_features': data_loader.item_features,
        'labels': data_loader.labels,
        'tvt_nids': data_loader.tvt_idx,
        'u2index': data_loader.u2index,
        'p2index': data_loader.p2index
    }


In [62]:
import yaml

In [63]:
model_config_file = '../../conf/base/parameters/eland.yml'

In [64]:
with open(model_config_file, "r") as stream:
    try:
        mode_config=yaml.safe_load(stream)
        print(mode_config)
    except yaml.YAMLError as exc:
        print(exc)

{'eland_data_load_options': {'graph_num': 0.1, 'dataset': 'reddit', 'method': 'gcn', 'rnn': 'gru', 'baseline': 'store_true'}, 'eland_model_options': {'dim_feats': 300, 'cuda': 0, 'hidden_size': 128, 'n_layers': 2, 'epochs': 50, 'batch_size': 10, 'seed': -1, 'lr': 0.0001, 'log': True, 'weight_decay': 1e-06, 'dropout': 0.4, 'tensorboard': False, 'name': 'debug', 'gnnlayer_type': 'gcn', 'rnn_type': 'lstm', 'pretrain_bm': 25, 'pretrain_nc': 200, 'alpha': 0.05, 'bmloss_type': 'mse', 'device': 'cpu', 'base_pred': 400, 'save_directory': 'data/07_model_output/user_behavior'}}


In [None]:
model_config = ElandConfig(mode_config['eland_model_options'])
model_obj = Eland_e2e(
    data_dict['graph'], 
    data_dict['lstm_dataloader'], 
    data_dict['user_features'],
    data_dict['item_features'], 
    data_dict['labels'], 
    data_dict['tvt_nids'], 
    data_dict['u2index'],
    data_dict['p2index'], 
    data_dict['item_features'], 
    model_config
)
auc, ap = model_obj.train()

In [44]:
for rate in rates:
        auc_res, ap_res = [], []
        for _ in range(20):
            auc, ap = run(dataset, rate, name=f'{dataset}_{method}_{rate}_{rnn}', baseline=baseline, gnnlayer_type=method, rnnlayer_type=rnn, device=device)
            auc_res.append(auc)
            ap_res.append(ap)
        with open(f'ELANDe2e_{dataset}_{method}_{rate}_{rnn}_try.txt', 'a') as f:
            f.write(f'auc: {np.mean(auc_res)} +- {np.std(auc_res)}, ap: {np.mean(ap_res)} +- {np.std(ap_res)}\n')

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 333536.62it/s]
2022-06-30 00:54:22,325 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:54:22,511 - NCNet pretrain, Epoch [1 / 300]: loss 1.3692, training auc: 0.5713, val_auc 0.4652, test auc 0.4359
2022-06-30 00:54:22,636 - NCNet pretrain, Epoch [2 / 300]: loss 0.8614, training auc: 0.4508, val_auc 0.4638
2022-06-30 00:54:22,779 - NCNet pretrain, Epoch [3 / 300]: loss 0.5982, training auc: 0.4116, val_auc 0.4611
2022-06-30 00:54:22,917 - NCNet pretrain, Epoch [4 / 300]: loss 0.5114, training auc: 0.4376, val_auc 0.4602
2022-06-30 00:54:23,062 - NCNet pretrain, Epoch [5 / 300]: loss 0.5774, training auc: 0.4213,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 252271.71it/s]
2022-06-30 00:54:48,795 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:54:48,975 - NCNet pretrain, Epoch [1 / 300]: loss 0.5869, training auc: 0.5537, val_auc 0.4616, test auc 0.4315
2022-06-30 00:54:49,094 - NCNet pretrain, Epoch [2 / 300]: loss 0.4500, training auc: 0.4336, val_auc 0.4614
2022-06-30 00:54:49,215 - NCNet pretrain, Epoch [3 / 300]: loss 0.4639, training auc: 0.4493, val_auc 0.4608
2022-06-30 00:54:49,344 - NCNet pretrain, Epoch [4 / 300]: loss 0.4802, training auc: 0.4061, val_auc 0.4623, test auc 0.4348
2022-06-30 00:54:49,474 - NCNet pretrain, Epoch [5 / 300]: loss 0.4509, trai

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 251565.01it/s]
2022-06-30 00:55:07,515 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:55:07,721 - NCNet pretrain, Epoch [1 / 300]: loss 0.9614, training auc: 0.3978, val_auc 0.4735, test auc 0.4495
2022-06-30 00:55:07,839 - NCNet pretrain, Epoch [2 / 300]: loss 0.6709, training auc: 0.5026, val_auc 0.4694
2022-06-30 00:55:07,977 - NCNet pretrain, Epoch [3 / 300]: loss 0.5252, training auc: 0.4168, val_auc 0.4655
2022-06-30 00:55:08,113 - NCNet pretrain, Epoch [4 / 300]: loss 0.4992, training auc: 0.4208, val_auc 0.4642
2022-06-30 00:55:08,252 - NCNet pretrain, Epoch [5 / 300]: loss 0.5412, training auc: 0.4392,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 329947.06it/s]
2022-06-30 00:55:33,507 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:55:33,725 - NCNet pretrain, Epoch [1 / 300]: loss 0.4419, training auc: 0.5241, val_auc 0.4688, test auc 0.4419
2022-06-30 00:55:33,856 - NCNet pretrain, Epoch [2 / 300]: loss 0.4365, training auc: 0.4332, val_auc 0.4695, test auc 0.4448
2022-06-30 00:55:33,987 - NCNet pretrain, Epoch [3 / 300]: loss 0.4364, training auc: 0.4086, val_auc 0.4782, test auc 0.4571
2022-06-30 00:55:34,119 - NCNet pretrain, Epoch [4 / 300]: loss 0.4224, training auc: 0.4933, val_auc 0.4884, test auc 0.4703
2022-06-30 00:55:34,254 - NCNet pretrain, 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 253757.76it/s]
2022-06-30 00:55:51,977 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:55:52,192 - NCNet pretrain, Epoch [1 / 300]: loss 0.6584, training auc: 0.5446, val_auc 0.4674, test auc 0.4415
2022-06-30 00:55:52,310 - NCNet pretrain, Epoch [2 / 300]: loss 0.4783, training auc: 0.4343, val_auc 0.4641
2022-06-30 00:55:52,428 - NCNet pretrain, Epoch [3 / 300]: loss 0.4807, training auc: 0.4292, val_auc 0.4635
2022-06-30 00:55:52,547 - NCNet pretrain, Epoch [4 / 300]: loss 0.4946, training auc: 0.4374, val_auc 0.4645
2022-06-30 00:55:52,666 - NCNet pretrain, Epoch [5 / 300]: loss 0.4886, training auc: 0.4146,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 249887.24it/s]
2022-06-30 00:56:16,446 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:56:16,626 - NCNet pretrain, Epoch [1 / 300]: loss 0.8572, training auc: 0.5209, val_auc 0.4722, test auc 0.4471
2022-06-30 00:56:16,744 - NCNet pretrain, Epoch [2 / 300]: loss 0.5538, training auc: 0.4130, val_auc 0.4679
2022-06-30 00:56:16,868 - NCNet pretrain, Epoch [3 / 300]: loss 0.4790, training auc: 0.4453, val_auc 0.4655
2022-06-30 00:56:16,986 - NCNet pretrain, Epoch [4 / 300]: loss 0.5317, training auc: 0.4112, val_auc 0.4654
2022-06-30 00:56:17,106 - NCNet pretrain, Epoch [5 / 300]: loss 0.5313, training auc: 0.4515,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 336183.83it/s]
2022-06-30 00:56:41,552 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:56:41,731 - NCNet pretrain, Epoch [1 / 300]: loss 0.7905, training auc: 0.4781, val_auc 0.4559, test auc 0.4245
2022-06-30 00:56:41,861 - NCNet pretrain, Epoch [2 / 300]: loss 0.4985, training auc: 0.4220, val_auc 0.4566, test auc 0.4258
2022-06-30 00:56:41,989 - NCNet pretrain, Epoch [3 / 300]: loss 0.4941, training auc: 0.3935, val_auc 0.4568, test auc 0.4262
2022-06-30 00:56:42,118 - NCNet pretrain, Epoch [4 / 300]: loss 0.5312, training auc: 0.3918, val_auc 0.4572, test auc 0.4269
2022-06-30 00:56:42,266 - NCNet pretrain, 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 246345.69it/s]
2022-06-30 00:57:05,554 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:57:05,736 - NCNet pretrain, Epoch [1 / 300]: loss 0.5609, training auc: 0.4623, val_auc 0.4630, test auc 0.4332
2022-06-30 00:57:05,855 - NCNet pretrain, Epoch [2 / 300]: loss 0.4440, training auc: 0.4526, val_auc 0.4624
2022-06-30 00:57:05,974 - NCNet pretrain, Epoch [3 / 300]: loss 0.4883, training auc: 0.3998, val_auc 0.4614
2022-06-30 00:57:06,103 - NCNet pretrain, Epoch [4 / 300]: loss 0.4630, training auc: 0.4380, val_auc 0.4643, test auc 0.4381
2022-06-30 00:57:06,250 - NCNet pretrain, Epoch [5 / 300]: loss 0.4442, trai

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 236638.95it/s]
2022-06-30 00:57:28,483 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:57:28,699 - NCNet pretrain, Epoch [1 / 300]: loss 1.0035, training auc: 0.5127, val_auc 0.4554, test auc 0.4253
2022-06-30 00:57:28,828 - NCNet pretrain, Epoch [2 / 300]: loss 0.6707, training auc: 0.4599, val_auc 0.4595, test auc 0.4297
2022-06-30 00:57:28,977 - NCNet pretrain, Epoch [3 / 300]: loss 0.5129, training auc: 0.4224, val_auc 0.4607, test auc 0.4310
2022-06-30 00:57:29,124 - NCNet pretrain, Epoch [4 / 300]: loss 0.4953, training auc: 0.4495, val_auc 0.4610, test auc 0.4312
2022-06-30 00:57:29,271 - NCNet pretrain, 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 319447.77it/s]
2022-06-30 00:57:51,804 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:57:52,019 - NCNet pretrain, Epoch [1 / 300]: loss 0.7632, training auc: 0.5274, val_auc 0.4700, test auc 0.4449
2022-06-30 00:57:52,146 - NCNet pretrain, Epoch [2 / 300]: loss 0.5063, training auc: 0.4643, val_auc 0.4651
2022-06-30 00:57:52,263 - NCNet pretrain, Epoch [3 / 300]: loss 0.4980, training auc: 0.4446, val_auc 0.4642
2022-06-30 00:57:52,379 - NCNet pretrain, Epoch [4 / 300]: loss 0.5501, training auc: 0.4212, val_auc 0.4650
2022-06-30 00:57:52,498 - NCNet pretrain, Epoch [5 / 300]: loss 0.5361, training auc: 0.4166,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 240770.58it/s]
2022-06-30 00:58:17,243 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:58:17,448 - NCNet pretrain, Epoch [1 / 300]: loss 0.8110, training auc: 0.5922, val_auc 0.4603, test auc 0.4327
2022-06-30 00:58:17,588 - NCNet pretrain, Epoch [2 / 300]: loss 0.4925, training auc: 0.4512, val_auc 0.4589
2022-06-30 00:58:17,723 - NCNet pretrain, Epoch [3 / 300]: loss 0.5111, training auc: 0.3994, val_auc 0.4585
2022-06-30 00:58:17,843 - NCNet pretrain, Epoch [4 / 300]: loss 0.5132, training auc: 0.4288, val_auc 0.4580
2022-06-30 00:58:17,960 - NCNet pretrain, Epoch [5 / 300]: loss 0.5011, training auc: 0.4247,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 333650.56it/s]
2022-06-30 00:58:39,556 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:58:39,735 - NCNet pretrain, Epoch [1 / 300]: loss 0.4274, training auc: 0.5366, val_auc 0.4643, test auc 0.4350
2022-06-30 00:58:39,863 - NCNet pretrain, Epoch [2 / 300]: loss 0.4499, training auc: 0.4310, val_auc 0.4689, test auc 0.4407
2022-06-30 00:58:39,992 - NCNet pretrain, Epoch [3 / 300]: loss 0.4253, training auc: 0.4573, val_auc 0.4788, test auc 0.4510
2022-06-30 00:58:40,141 - NCNet pretrain, Epoch [4 / 300]: loss 0.4192, training auc: 0.4958, val_auc 0.5365, test auc 0.5039
2022-06-30 00:58:40,289 - NCNet pretrain, 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 248992.98it/s]
2022-06-30 00:58:56,160 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:58:56,374 - NCNet pretrain, Epoch [1 / 300]: loss 0.8100, training auc: 0.6565, val_auc 0.4668, test auc 0.4380
2022-06-30 00:58:56,493 - NCNet pretrain, Epoch [2 / 300]: loss 0.5047, training auc: 0.4697, val_auc 0.4611
2022-06-30 00:58:56,610 - NCNet pretrain, Epoch [3 / 300]: loss 0.4846, training auc: 0.4402, val_auc 0.4600
2022-06-30 00:58:56,727 - NCNet pretrain, Epoch [4 / 300]: loss 0.5483, training auc: 0.3837, val_auc 0.4610
2022-06-30 00:58:56,846 - NCNet pretrain, Epoch [5 / 300]: loss 0.5241, training auc: 0.3990,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 237730.53it/s]
2022-06-30 00:59:19,736 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:59:19,914 - NCNet pretrain, Epoch [1 / 300]: loss 0.7422, training auc: 0.5827, val_auc 0.4585, test auc 0.4285
2022-06-30 00:59:20,041 - NCNet pretrain, Epoch [2 / 300]: loss 0.5129, training auc: 0.4407, val_auc 0.4608, test auc 0.4316
2022-06-30 00:59:20,177 - NCNet pretrain, Epoch [3 / 300]: loss 0.4704, training auc: 0.4135, val_auc 0.4601
2022-06-30 00:59:20,294 - NCNet pretrain, Epoch [4 / 300]: loss 0.4844, training auc: 0.4720, val_auc 0.4604
2022-06-30 00:59:20,411 - NCNet pretrain, Epoch [5 / 300]: loss 0.5019, trai

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 333916.42it/s]
2022-06-30 00:59:42,414 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 00:59:42,592 - NCNet pretrain, Epoch [1 / 300]: loss 1.0669, training auc: 0.4832, val_auc 0.4591, test auc 0.4306
2022-06-30 00:59:42,719 - NCNet pretrain, Epoch [2 / 300]: loss 0.7096, training auc: 0.4274, val_auc 0.4606, test auc 0.4315
2022-06-30 00:59:42,841 - NCNet pretrain, Epoch [3 / 300]: loss 0.5268, training auc: 0.4285, val_auc 0.4599
2022-06-30 00:59:42,959 - NCNet pretrain, Epoch [4 / 300]: loss 0.4927, training auc: 0.4583, val_auc 0.4602
2022-06-30 00:59:43,082 - NCNet pretrain, Epoch [5 / 300]: loss 0.5578, trai

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 243787.61it/s]
2022-06-30 01:00:06,231 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:00:06,411 - NCNet pretrain, Epoch [1 / 300]: loss 0.5515, training auc: 0.6080, val_auc 0.4643, test auc 0.4347
2022-06-30 01:00:06,529 - NCNet pretrain, Epoch [2 / 300]: loss 0.4459, training auc: 0.4208, val_auc 0.4640
2022-06-30 01:00:06,661 - NCNet pretrain, Epoch [3 / 300]: loss 0.4713, training auc: 0.4204, val_auc 0.4661, test auc 0.4389
2022-06-30 01:00:06,791 - NCNet pretrain, Epoch [4 / 300]: loss 0.4615, training auc: 0.4288, val_auc 0.4691, test auc 0.4445
2022-06-30 01:00:06,920 - NCNet pretrain, Epoch [5 / 300]: 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 246930.14it/s]
2022-06-30 01:00:26,701 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:00:26,902 - NCNet pretrain, Epoch [1 / 300]: loss 0.5280, training auc: 0.5321, val_auc 0.4727, test auc 0.4479
2022-06-30 01:00:27,038 - NCNet pretrain, Epoch [2 / 300]: loss 0.4405, training auc: 0.4445, val_auc 0.4699
2022-06-30 01:00:27,174 - NCNet pretrain, Epoch [3 / 300]: loss 0.4598, training auc: 0.4399, val_auc 0.4718
2022-06-30 01:00:27,303 - NCNet pretrain, Epoch [4 / 300]: loss 0.4596, training auc: 0.3961, val_auc 0.4755, test auc 0.4556
2022-06-30 01:00:27,432 - NCNet pretrain, Epoch [5 / 300]: loss 0.4323, trai

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 314733.65it/s]
2022-06-30 01:00:45,293 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:00:45,482 - NCNet pretrain, Epoch [1 / 300]: loss 1.1334, training auc: 0.6124, val_auc 0.4683, test auc 0.4415
2022-06-30 01:00:45,600 - NCNet pretrain, Epoch [2 / 300]: loss 0.6912, training auc: 0.4127, val_auc 0.4639
2022-06-30 01:00:45,718 - NCNet pretrain, Epoch [3 / 300]: loss 0.5248, training auc: 0.4291, val_auc 0.4617
2022-06-30 01:00:45,836 - NCNet pretrain, Epoch [4 / 300]: loss 0.5263, training auc: 0.4105, val_auc 0.4617
2022-06-30 01:00:45,953 - NCNet pretrain, Epoch [5 / 300]: loss 0.5730, training auc: 0.4201,

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 239958.77it/s]
2022-06-30 01:01:11,227 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:01:11,409 - NCNet pretrain, Epoch [1 / 300]: loss 0.4650, training auc: 0.6895, val_auc 0.4635, test auc 0.4353
2022-06-30 01:01:11,556 - NCNet pretrain, Epoch [2 / 300]: loss 0.4343, training auc: 0.4181, val_auc 0.4645, test auc 0.4375
2022-06-30 01:01:11,674 - NCNet pretrain, Epoch [3 / 300]: loss 0.4472, training auc: 0.3862, val_auc 0.4635
2022-06-30 01:01:11,821 - NCNet pretrain, Epoch [4 / 300]: loss 0.4358, training auc: 0.4017, val_auc 0.4672, test auc 0.4438
2022-06-30 01:01:11,950 - NCNet pretrain, Epoch [5 / 300]: 

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 247655.31it/s]
2022-06-30 01:01:30,767 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:01:30,955 - NCNet pretrain, Epoch [1 / 300]: loss 0.8010, training auc: 0.4865, val_auc 0.4513, test auc 0.4185
2022-06-30 01:01:31,090 - NCNet pretrain, Epoch [2 / 300]: loss 0.4892, training auc: 0.4130, val_auc 0.4558, test auc 0.4250
2022-06-30 01:01:31,220 - NCNet pretrain, Epoch [3 / 300]: loss 0.4852, training auc: 0.4182, val_auc 0.4561, test auc 0.4260
2022-06-30 01:01:31,339 - NCNet pretrain, Epoch [4 / 300]: loss 0.5163, training auc: 0.4008, val_auc 0.4556
2022-06-30 01:01:31,470 - NCNet pretrain, Epoch [5 / 300]: 

In [45]:
auc_res

[0.7815773090163335,
 0.800525263939898,
 0.7993128145567171,
 0.815706385828337,
 0.7999445438469828,
 0.8092262558725973,
 0.7970272101369662,
 0.8121704021094265,
 0.7854572778353265,
 0.7997640497640498,
 0.8031574693160058,
 0.8126929194002366,
 0.8037211863431375,
 0.7922094568436032,
 0.7854075765661132,
 0.8040534000899855,
 0.8062206370133198,
 0.7910624038672819,
 0.8102804459511777,
 0.7969376170595682]

In [46]:
ap_res

[0.3835175775414199,
 0.4188607136460357,
 0.4096221560568864,
 0.44033756451677947,
 0.42489229519446203,
 0.4293152467335025,
 0.40993789257391433,
 0.436776757984043,
 0.4022491584864704,
 0.4143178366148888,
 0.41319479676545195,
 0.44085022617260056,
 0.4166372442776068,
 0.4087923448209431,
 0.389768553307066,
 0.433510374732659,
 0.4276592625626203,
 0.39842701710859607,
 0.4354548495427667,
 0.40314497268356725]

In [50]:
baseline = 'True'
for rate in rates:
        auc_res, ap_res = [], []
        for _ in range(1):
            auc, ap = run(dataset, rate, name=f'{dataset}_{method}_{rate}_{rnn}', baseline=baseline, gnnlayer_type=method, rnnlayer_type=rnn, device=device)
            auc_res.append(auc)
            ap_res.append(ap)
        with open(f'ELANDe2e_{dataset}_{method}_{rate}_{rnn}_try.txt', 'a') as f:
            f.write(f'auc: {np.mean(auc_res)} +- {np.std(auc_res)}, ap: {np.mean(ap_res)} +- {np.std(ap_res)}\n')

loaded labels, total of 832 positive users and 6000 labeled users
Train: total of  1200 users with   174 pos users and  1026 neg users
Val:   total of  1200 users with   166 pos users and  1034 neg users
Test:  total of  3600 users with   492 pos users and  3108 neg users


178841it [00:00, 252819.70it/s]
2022-06-30 01:18:32,911 - Parameters: {'base_pred': 30, 'device': device(type='cuda', index=0), 'bmloss_type': 'mse', 'alpha': 0.05, 'pretrain_nc': 300, 'pretrain_bm': 25, 'rnn_type': 'gru', 'gnnlayer_type': 'gcn', 'name': 'reddit_gcn_0.3_gru', 'log': True, 'tensorboard': False, 'dropout': 0.4, 'weight_decay': 1e-05, 'lr': 0.01, 'seed': -1, 'epochs': 400, 'n_layers': 2, 'hidden_size': 128, 'cuda': 0, 'dim_feats': 300}
2022-06-30 01:18:33,101 - NCNet pretrain, Epoch [1 / 300]: loss 0.4542, training auc: 0.5758, val_auc 0.4818, test auc 0.4613
2022-06-30 01:18:33,225 - NCNet pretrain, Epoch [2 / 300]: loss 0.4429, training auc: 0.4414, val_auc 0.4816
2022-06-30 01:18:33,372 - NCNet pretrain, Epoch [3 / 300]: loss 0.4369, training auc: 0.4597, val_auc 0.4912, test auc 0.4755
2022-06-30 01:18:33,505 - NCNet pretrain, Epoch [4 / 300]: loss 0.4294, training auc: 0.4867, val_auc 0.4988, test auc 0.4864
2022-06-30 01:18:33,642 - NCNet pretrain, Epoch [5 / 300]: 