In [1]:
from model.utils import *
from model.gnn import *
from model.node2vec import *
from model.bert_embeddings import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
set_seeds(42)

df = data_preprocess('data/full_context_PeerRead.csv')

## Node2Vec Baseline Model

In [3]:
run_node2vec(df, 'train', 'model/node2vec_model.pth')

Epoch: 100, Train Loss: 0.001, Val AUC: 0.642
Epoch: 200, Train Loss: 0.001, Val AUC: 0.644
Epoch: 300, Train Loss: 0.001, Val AUC: 0.645
Epoch: 400, Train Loss: 0.001, Val AUC: 0.646
Epoch: 500, Train Loss: 0.001, Val AUC: 0.647
Epoch: 600, Train Loss: 0.001, Val AUC: 0.648
Epoch: 700, Train Loss: 0.001, Val AUC: 0.648
Epoch: 800, Train Loss: 0.001, Val AUC: 0.649
Epoch: 900, Train Loss: 0.001, Val AUC: 0.649
Epoch: 1000, Train Loss: 0.001, Val AUC: 0.650
Model saved to model/node2vec_model.pth


In [4]:
node2vec_result = run_node2vec(df, 'evaluate', 'model/node2vec_model.pth')
node2vec_result

{'MRR': 0.1879389359588977,
 'MAP@5': 0.062332882273342084,
 'MAP@10': 0.0579965099105345,
 'MAP@30': 0.05951454702935102,
 'MAP@50': 0.060283805284909224,
 'MAP@80': 0.06086205160538773,
 'Recall@5': 0.07121131709678721,
 'Recall@10': 0.08962863466492975,
 'Recall@30': 0.12709981918330793,
 'Recall@50': 0.1501050697410791,
 'Recall@80': 0.17726019423011827}

## BERT+GNN

In [5]:
# generate_embeddings(df)

In [6]:
run_gnn(df, 'train', 'model/gnn_model.pth')

Epoch: 100, Train Loss: 0.592, Val AUC: 0.823
Epoch: 200, Train Loss: 0.568, Val AUC: 0.832
Epoch: 300, Train Loss: 0.550, Val AUC: 0.840
Epoch: 400, Train Loss: 0.532, Val AUC: 0.844
Epoch: 500, Train Loss: 0.522, Val AUC: 0.852
Epoch: 600, Train Loss: 0.514, Val AUC: 0.858
Epoch: 700, Train Loss: 0.503, Val AUC: 0.864
Epoch: 800, Train Loss: 0.494, Val AUC: 0.871
Epoch: 900, Train Loss: 0.486, Val AUC: 0.876
Epoch: 1000, Train Loss: 0.480, Val AUC: 0.878
Model saved to model/gnn_model.pth


In [7]:
gnn_result = run_gnn(df, 'evaluate', 'model/gnn_model.pth')
gnn_result

{'MRR': 0.22009715111411915,
 'MAP@5': 0.07595233799428637,
 'MAP@10': 0.07432527843493425,
 'MAP@30': 0.08180236039674985,
 'MAP@50': 0.08461545805564777,
 'MAP@80': 0.08648521326416156,
 'Recall@5': 0.10330543974044595,
 'Recall@10': 0.15171966445085933,
 'Recall@30': 0.2461263191854636,
 'Recall@50': 0.29700818001033663,
 'Recall@80': 0.3493000952417341}

## Model Comparion

In [8]:
models = {
    'node2vec': node2vec_result,
    'BERT+GNN': gnn_result,
}

# Generate the evaluation table
evaluation_table = metric_evaluation_table(models)
print(evaluation_table)


               MRR     MAP@5    MAP@10    MAP@30    MAP@50    MAP@80  \
Model                                                                  
node2vec  0.187939  0.062333  0.057997  0.059515  0.060284  0.060862   
BERT+GNN  0.220097  0.075952  0.074325  0.081802  0.084615  0.086485   

          Recall@5  Recall@10  Recall@30  Recall@50  Recall@80  
Model                                                           
node2vec  0.071211   0.089629   0.127100   0.150105    0.17726  
BERT+GNN  0.103305   0.151720   0.246126   0.297008    0.34930  
