## Prelims

### configs

In [2]:
GPU_INDEX=0
DATA_DIR="../../data/splits/stratified_general/"
FULL_CHV_PATH="../../data/chv.csv"
FEATURE_PATH="../../data/precomputed_features/"
SNOMED_PATH = "../../data/SnomedCT_201907/"

### imports

In [3]:
%load_ext autoreload
%autoreload 2

import torch
torch.manual_seed(2020) 
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../'))
from src.data import *
from src.loss import *
from src.models import *
from src.evaluation import *
from src.train import *

### load some dicts (to be used later)

In [4]:
# load SNOMED graph
from data.Snomed import Snomed
snomed = Snomed(SNOMED_PATH, taxonomy=False)
snomed.load_snomed()

In [5]:
# create surface_to_snomed_id dict
SF2ID = build_surface_to_snomed_id(snomed)

In [6]:
# create testset_row_index_to_ed_dict dict
ED_DICT = pkl.load(open(os.path.join(FEATURE_PATH,"term_ed_dic_stratified_general.pkl"),"rb"))

## Experiments

### BERT align

#### static term -> surface

In [6]:
# input: static term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_plain_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path, 
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path,
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [27]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.061 Acc@10: 0.193 Acc@100: 0.384 AccSum: 0.637
[epoch 1][test] Acc@1: 0.066 Acc@10: 0.188 Acc@100: 0.379 AccSum: 0.633
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.140 Acc@10: 0.347 Acc@100: 0.548 AccSum: 1.035
[epoch 2][test] Acc@1: 0.149 Acc@10: 0.353 Acc@100: 0.554 AccSum: 1.056
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.173 Acc@10: 0.393 Acc@100: 0.589 AccSum: 1.155
[epoch 3][test] Acc@1: 0.178 Acc@10: 0.398 Acc@100: 0.594 AccSum: 1.170
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.216 Acc@10: 0.437 Acc@100: 0.627 AccSum: 1.281
[epoch 4][test] Acc@1: 0.220 Acc@10: 0.446 Acc@100: 0.634 AccSum: 1.299
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.216 Acc@10: 0.442 Acc@100: 0.630 AccSum: 1.287
[epoch 5][test] Acc@1: 0.225 Acc@10: 0.452 Acc@100: 0.636 AccSum: 1.313
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.203 Acc@10: 0.432 Acc@100: 0.625 AccSum: 1.260
[epoch 7][dev ] Acc@1: 0.217 Acc@10: 0.450 Acc@100: 0.63

In [28]:
save_path = "bert_static_term_to_surface_state_dict.pkl"
torch.save(model.state_dict(), save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [30]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.411


#### contextual_term -> surface

##### mutlilevel attention

In [6]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [7]:
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.202 Acc@10: 0.433 Acc@100: 0.626 AccSum: 1.261
[epoch 1][test] Acc@1: 0.210 Acc@10: 0.446 Acc@100: 0.639 AccSum: 1.294
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.198 Acc@10: 0.420 Acc@100: 0.608 AccSum: 1.226
[epoch 3][dev ] Acc@1: 0.273 Acc@10: 0.526 Acc@100: 0.715 AccSum: 1.514
[epoch 3][test] Acc@1: 0.286 Acc@10: 0.542 Acc@100: 0.723 AccSum: 1.551
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.271 Acc@10: 0.520 Acc@100: 0.694 AccSum: 1.485
[epoch 5][dev ] Acc@1: 0.290 Acc@10: 0.562 Acc@100: 0.744 AccSum: 1.596
[epoch 5][test] Acc@1: 0.307 Acc@10: 0.575 Acc@100: 0.745 AccSum: 1.627
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.293 Acc@10: 0.543 Acc@100: 0.719 AccSum: 1.555
[epoch 7][dev ] Acc@1: 0.296 Acc@10: 0.545 Acc@100: 0.716 AccSum: 1.557
[epoch 8][dev ] Acc@1: 0.312 Acc@10: 0.569 Acc@100: 0.754 AccSum: 1.634
[epoch 8][test] Acc@1: 0.329 Acc@10: 0.588 Acc@100: 0.759 AccSum: 1.676
[best epoch: 8]


In [8]:
save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
torch.save(best_sd, save_path)
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

multilevel_attention(
  (self_attn): SelfAttention()
  (fc): Linear(in_features=768, out_features=768, bias=True)
)

In [9]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)
# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.480


##### linear + relu

In [7]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [9]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.002 AccSum: 0.003
[epoch 1][dev ] Acc@1: 0.122 Acc@10: 0.330 Acc@100: 0.544 AccSum: 0.996
[epoch 1][test] Acc@1: 0.136 Acc@10: 0.341 Acc@100: 0.543 AccSum: 1.021
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.181 Acc@10: 0.418 Acc@100: 0.623 AccSum: 1.222
[epoch 2][test] Acc@1: 0.189 Acc@10: 0.434 Acc@100: 0.630 AccSum: 1.253
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.205 Acc@10: 0.451 Acc@100: 0.653 AccSum: 1.308
[epoch 3][test] Acc@1: 0.217 Acc@10: 0.464 Acc@100: 0.654 AccSum: 1.335
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.227 Acc@10: 0.481 Acc@100: 0.673 AccSum: 1.381
[epoch 4][test] Acc@1: 0.245 Acc@10: 0.494 Acc@100: 0.678 AccSum: 1.417
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.235 Acc@10: 0.487 Acc@100: 0.675 AccSum: 1.397
[epoch 5][test] Acc@1: 0.248 Acc@10: 0.499 Acc@100: 0.677 AccSum: 1.424
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.247 Acc@10: 0.503 Acc@100: 0.689 AccSum: 1.439
[epoch 6][test] Acc@1: 0.254 Acc@10: 0.511 Acc@100: 0.68

[epoch 59][dev ] Acc@1: 0.292 Acc@10: 0.560 Acc@100: 0.740 AccSum: 1.593
[epoch 59][test] Acc@1: 0.312 Acc@10: 0.574 Acc@100: 0.741 AccSum: 1.627
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.292 Acc@10: 0.561 Acc@100: 0.741 AccSum: 1.595
[epoch 60][test] Acc@1: 0.314 Acc@10: 0.574 Acc@100: 0.741 AccSum: 1.629
[best epoch: 60]
[epoch 61][dev ] Acc@1: 0.293 Acc@10: 0.562 Acc@100: 0.743 AccSum: 1.597
[epoch 61][test] Acc@1: 0.315 Acc@10: 0.575 Acc@100: 0.741 AccSum: 1.631
[best epoch: 61]
[epoch 62][dev ] Acc@1: 0.291 Acc@10: 0.563 Acc@100: 0.742 AccSum: 1.596
[epoch 63][dev ] Acc@1: 0.292 Acc@10: 0.564 Acc@100: 0.743 AccSum: 1.599
[epoch 63][test] Acc@1: 0.316 Acc@10: 0.577 Acc@100: 0.741 AccSum: 1.634
[best epoch: 63]
[epoch 64][dev ] Acc@1: 0.294 Acc@10: 0.564 Acc@100: 0.744 AccSum: 1.602
[epoch 64][test] Acc@1: 0.317 Acc@10: 0.577 Acc@100: 0.742 AccSum: 1.636
[best epoch: 64]
[epoch 65][dev ] Acc@1: 0.294 Acc@10: 0.563 Acc@100: 0.744 AccSum: 1.601
[epoch 66][dev ] Acc@1: 0.294 Acc@10: 0

In [10]:
save_path = "bert_context_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

val_or_test(model, test_loader, testset.search_space_embeddings, \
                epoch=-1, typ="test", bsz=128, GPU_INDEX=GPU_INDEX)

[epoch 0][test] Acc@1: 0.323 Acc@10: 0.585 Acc@100: 0.747 AccSum: 1.656


[0.3232183908045977, 0.585287356321839, 0.7473563218390804]

In [11]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.413


### fasttext align

#### term -> surface

In [12]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [13]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True,device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.055 Acc@10: 0.230 Acc@100: 0.494 AccSum: 0.779
[epoch 1][test] Acc@1: 0.051 Acc@10: 0.214 Acc@100: 0.480 AccSum: 0.745
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.148 Acc@10: 0.425 Acc@100: 0.695 AccSum: 1.268
[epoch 2][test] Acc@1: 0.144 Acc@10: 0.412 Acc@100: 0.694 AccSum: 1.251
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.191 Acc@10: 0.491 Acc@100: 0.746 AccSum: 1.428
[epoch 3][test] Acc@1: 0.190 Acc@10: 0.490 Acc@100: 0.744 AccSum: 1.424
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.202 Acc@10: 0.515 Acc@100: 0.769 AccSum: 1.486
[epoch 4][test] Acc@1: 0.200 Acc@10: 0.517 Acc@100: 0.767 AccSum: 1.484
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.238 Acc@10: 0.565 Acc@100: 0.802 AccSum: 1.605
[epoch 5][test] Acc@1: 0.241 Acc@10: 0.570 Acc@100: 0.802 AccSum: 1.613
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.261 Acc@10: 0.597 Acc@100: 0.822 AccSum: 1.679
[epoch 6][test] Acc@1: 0.269 Acc@10: 0.597 Acc@100: 0.82

[epoch 53][test] Acc@1: 0.400 Acc@10: 0.722 Acc@100: 0.890 AccSum: 2.013
[best epoch: 53]
[epoch 54][dev ] Acc@1: 0.396 Acc@10: 0.717 Acc@100: 0.889 AccSum: 2.002
[epoch 55][dev ] Acc@1: 0.394 Acc@10: 0.716 Acc@100: 0.889 AccSum: 2.000
[epoch 56][dev ] Acc@1: 0.393 Acc@10: 0.715 Acc@100: 0.889 AccSum: 1.998
[epoch 57][dev ] Acc@1: 0.393 Acc@10: 0.716 Acc@100: 0.888 AccSum: 1.997
[epoch 58][dev ] Acc@1: 0.393 Acc@10: 0.716 Acc@100: 0.888 AccSum: 1.997
[epoch 59][dev ] Acc@1: 0.391 Acc@10: 0.718 Acc@100: 0.889 AccSum: 1.998
[epoch 60][dev ] Acc@1: 0.392 Acc@10: 0.718 Acc@100: 0.889 AccSum: 1.999
[epoch 61][dev ] Acc@1: 0.395 Acc@10: 0.718 Acc@100: 0.889 AccSum: 2.002
[epoch 62][dev ] Acc@1: 0.396 Acc@10: 0.720 Acc@100: 0.889 AccSum: 2.005
[epoch 62][test] Acc@1: 0.405 Acc@10: 0.723 Acc@100: 0.892 AccSum: 2.020
[best epoch: 62]
[epoch 63][dev ] Acc@1: 0.398 Acc@10: 0.719 Acc@100: 0.889 AccSum: 2.006
[epoch 63][test] Acc@1: 0.404 Acc@10: 0.724 Acc@100: 0.891 AccSum: 2.019
[best epoch: 63]


In [14]:
save_path = "ft_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [15]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.526


#### term -> graph

In [13]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [15]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.0 Acc@10: 0.0 Acc@100: 0.1 AccSum: 0.2
[epoch 1][dev ] Acc@1: 0.4 Acc@10: 2.6 Acc@100: 11.1 AccSum: 14.2
[epoch 1][test] Acc@1: 0.5 Acc@10: 2.8 Acc@100: 11.7 AccSum: 14.9
[best epoch: 1]
[epoch 2][dev ] Acc@1: 1.5 Acc@10: 6.4 Acc@100: 19.9 AccSum: 27.8
[epoch 2][test] Acc@1: 1.5 Acc@10: 6.9 Acc@100: 20.7 AccSum: 29.1
[best epoch: 2]
[epoch 3][dev ] Acc@1: 2.0 Acc@10: 7.6 Acc@100: 23.8 AccSum: 33.4
[epoch 3][test] Acc@1: 2.2 Acc@10: 8.4 Acc@100: 24.5 AccSum: 35.1
[best epoch: 3]
[epoch 4][dev ] Acc@1: 2.6 Acc@10: 9.2 Acc@100: 27.2 AccSum: 39.0
[epoch 4][test] Acc@1: 2.7 Acc@10: 10.4 Acc@100: 28.1 AccSum: 41.2
[best epoch: 4]
[epoch 5][dev ] Acc@1: 3.4 Acc@10: 11.6 Acc@100: 31.0 AccSum: 46.0
[epoch 5][test] Acc@1: 3.8 Acc@10: 12.6 Acc@100: 31.7 AccSum: 48.0
[best epoch: 5]
[epoch 6][dev ] Acc@1: 3.6 Acc@10: 12.3 Acc@100: 32.4 AccSum: 48.3
[epoch 6][test] Acc@1: 4.0 Acc@10: 13.5 Acc@100: 33.0 AccSum: 50.5
[best epoch: 6]
[epoch 7][dev ] Acc@1: 3.8 Acc@10: 13.5 Acc

In [5]:
save_path = "term_to_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [6]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=256, \
              sample=False, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False,\
            GPU_INDEX=GPU_INDEX)
print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))


MRR: 0.236


HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))


MGD: 0.403 MGD@10: 0.301


#### term -> surface+graph

In [14]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [9]:
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.0 Acc@10: 0.0 Acc@100: 0.1 AccSum: 0.1
[epoch 1][dev ] Acc@1: 5.8 Acc@10: 20.4 Acc@100: 47.0 AccSum: 73.2
[epoch 1][test] Acc@1: 5.8 Acc@10: 20.3 Acc@100: 46.6 AccSum: 72.7
[best epoch: 1]
[epoch 2][dev ] Acc@1: 12.0 Acc@10: 36.1 Acc@100: 65.5 AccSum: 113.6
[epoch 2][test] Acc@1: 12.7 Acc@10: 37.0 Acc@100: 65.5 AccSum: 115.1
[best epoch: 2]
[epoch 3][dev ] Acc@1: 15.7 Acc@10: 42.7 Acc@100: 70.0 AccSum: 128.5
[epoch 3][test] Acc@1: 16.2 Acc@10: 42.9 Acc@100: 71.1 AccSum: 130.2
[best epoch: 3]
[epoch 4][dev ] Acc@1: 19.0 Acc@10: 46.8 Acc@100: 74.3 AccSum: 140.1
[epoch 4][test] Acc@1: 19.9 Acc@10: 48.0 Acc@100: 74.9 AccSum: 142.9
[best epoch: 4]
[epoch 5][dev ] Acc@1: 22.2 Acc@10: 51.6 Acc@100: 78.2 AccSum: 152.0
[epoch 5][test] Acc@1: 23.2 Acc@10: 52.8 Acc@100: 78.6 AccSum: 154.6
[best epoch: 5]
[epoch 6][dev ] Acc@1: 23.1 Acc@10: 53.5 Acc@100: 79.0 AccSum: 155.7
[epoch 6][test] Acc@1: 23.7 Acc@10: 54.5 Acc@100: 79.4 AccSum: 157.6
[best epoch: 6]
[epoch 7][dev ] 

In [5]:
save_path = "ft_term_to_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

aligner(
  (fc1): Linear(in_features=300, out_features=600, bias=True)
)

In [6]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              sample=False, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, sample=False,\
            GPU_INDEX=GPU_INDEX)
print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.571


HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))


MGD: 0.634 MGD@10: 0.358


### combine BERT with fasttext

#### ft_term+bert_term -> ft_surface+bert_surface+graph

In [29]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_dict = build_train_dict(chv_train_path, gran="general")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [25]:
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.193 Acc@10: 0.444 Acc@100: 0.669 AccSum: 1.306
[epoch 1][test] Acc@1: 0.196 Acc@10: 0.458 Acc@100: 0.673 AccSum: 1.327
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.244 Acc@10: 0.523 Acc@100: 0.746 AccSum: 1.514
[epoch 2][test] Acc@1: 0.254 Acc@10: 0.531 Acc@100: 0.755 AccSum: 1.540
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.285 Acc@10: 0.568 Acc@100: 0.781 AccSum: 1.634
[epoch 3][test] Acc@1: 0.297 Acc@10: 0.581 Acc@100: 0.779 AccSum: 1.657
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.293 Acc@10: 0.582 Acc@100: 0.790 AccSum: 1.665
[epoch 4][test] Acc@1: 0.312 Acc@10: 0.598 Acc@100: 0.801 AccSum: 1.711
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.325 Acc@10: 0.604 Acc@100: 0.817 AccSum: 1.746
[epoch 5][test] Acc@1: 0.343 Acc@10: 0.623 Acc@100: 0.823 AccSum: 1.789
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.336 Acc@10: 0.619 Acc@100: 0.822 AccSum: 1.778
[epoch 6][test] Acc@1: 0.349 Acc@10: 0.638 Acc@100: 0.83

[epoch 54][test] Acc@1: 0.494 Acc@10: 0.768 Acc@100: 0.902 AccSum: 2.164
[best epoch: 54]
[epoch 55][dev ] Acc@1: 0.478 Acc@10: 0.773 Acc@100: 0.900 AccSum: 2.150
[epoch 55][test] Acc@1: 0.497 Acc@10: 0.768 Acc@100: 0.902 AccSum: 2.167
[best epoch: 55]
[epoch 56][dev ] Acc@1: 0.479 Acc@10: 0.774 Acc@100: 0.899 AccSum: 2.152
[epoch 56][test] Acc@1: 0.497 Acc@10: 0.769 Acc@100: 0.903 AccSum: 2.170
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.478 Acc@10: 0.776 Acc@100: 0.900 AccSum: 2.154
[epoch 57][test] Acc@1: 0.498 Acc@10: 0.769 Acc@100: 0.903 AccSum: 2.170
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.480 Acc@10: 0.778 Acc@100: 0.900 AccSum: 2.158
[epoch 58][test] Acc@1: 0.498 Acc@10: 0.770 Acc@100: 0.903 AccSum: 2.171
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.480 Acc@10: 0.777 Acc@100: 0.900 AccSum: 2.157
[epoch 60][dev ] Acc@1: 0.481 Acc@10: 0.776 Acc@100: 0.899 AccSum: 2.157
[epoch 61][dev ] Acc@1: 0.482 Acc@10: 0.776 Acc@100: 0.899 AccSum: 2.157
[epoch 62][dev ] Acc@1: 0.482 Acc@10: 0

In [26]:
save_path = "ft_bert_context_term_to_ft_bert_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1368, bias=True)
)

In [27]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=68.0), HTML(value='')))


MRR: 0.620


In [30]:
# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=None, train_dict=train_dict, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[dict+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 51.6%
0.5080459770114942
[dict+model] Acc@1: 0.726 Acc@10: 0.854 Acc@100: 0.921 Acc sum: 2.500
ratio attempted by heuristics: 45.6%
0.4009195402298851
[smatch+model] Acc@1: 0.627 Acc@10: 0.783 Acc@100: 0.868 Acc sum: 2.278
ratio attempted by heuristics: 69.0%
0.6554022988505747
[dict+smatch+model] Acc@1: 0.771 Acc@10: 0.850 Acc@100: 0.903 Acc sum: 2.524


#### ft_term+bert_term -> ft_surface+bert_surface

In [19]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [20]:
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.191 Acc@10: 0.455 Acc@100: 0.673 AccSum: 1.319
[epoch 1][test] Acc@1: 0.204 Acc@10: 0.463 Acc@100: 0.677 AccSum: 1.345
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.269 Acc@10: 0.537 Acc@100: 0.739 AccSum: 1.545
[epoch 2][test] Acc@1: 0.286 Acc@10: 0.552 Acc@100: 0.744 AccSum: 1.582
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.283 Acc@10: 0.565 Acc@100: 0.766 AccSum: 1.614
[epoch 3][test] Acc@1: 0.303 Acc@10: 0.579 Acc@100: 0.762 AccSum: 1.644
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.291 Acc@10: 0.580 Acc@100: 0.773 AccSum: 1.644
[epoch 4][test] Acc@1: 0.314 Acc@10: 0.589 Acc@100: 0.772 AccSum: 1.675
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.311 Acc@10: 0.596 Acc@100: 0.798 AccSum: 1.704
[epoch 5][test] Acc@1: 0.331 Acc@10: 0.611 Acc@100: 0.796 AccSum: 1.738
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.319 Acc@10: 0.612 Acc@100: 0.806 AccSum: 1.737
[epoch 6][test] Acc@1: 0.343 Acc@10: 0.625 Acc@100: 0.80

[epoch 52][test] Acc@1: 0.438 Acc@10: 0.718 Acc@100: 0.870 AccSum: 2.026
[best epoch: 52]
[epoch 53][dev ] Acc@1: 0.425 Acc@10: 0.712 Acc@100: 0.878 AccSum: 2.015
[epoch 53][test] Acc@1: 0.438 Acc@10: 0.718 Acc@100: 0.870 AccSum: 2.026
[best epoch: 53]
[epoch 54][dev ] Acc@1: 0.426 Acc@10: 0.713 Acc@100: 0.878 AccSum: 2.017
[epoch 54][test] Acc@1: 0.438 Acc@10: 0.719 Acc@100: 0.871 AccSum: 2.028
[best epoch: 54]
[epoch 55][dev ] Acc@1: 0.427 Acc@10: 0.715 Acc@100: 0.878 AccSum: 2.020
[epoch 55][test] Acc@1: 0.439 Acc@10: 0.722 Acc@100: 0.872 AccSum: 2.032
[best epoch: 55]
[epoch 56][dev ] Acc@1: 0.429 Acc@10: 0.715 Acc@100: 0.880 AccSum: 2.023
[epoch 56][test] Acc@1: 0.439 Acc@10: 0.722 Acc@100: 0.872 AccSum: 2.033
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.428 Acc@10: 0.715 Acc@100: 0.879 AccSum: 2.022
[epoch 58][dev ] Acc@1: 0.428 Acc@10: 0.713 Acc@100: 0.880 AccSum: 2.022
[epoch 59][dev ] Acc@1: 0.426 Acc@10: 0.712 Acc@100: 0.879 AccSum: 2.017
[epoch 60][dev ] Acc@1: 0.427 Acc@10: 0

In [21]:
save_path = "ft_bert_context_term_to_ft_bert_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1068, bias=True)
)

In [22]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.556


####  ft_term+bert_term (multilevel attention) -> ft_surface+bert_surface(+graph)

In [4]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
#snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path1 = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")
term_vec_path2 = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

gran="general"

train_dict = build_train_dict(chv_train_path, gran=gran)

train_loader, _ = get_loader_mla(FULL_CHV_PATH, chv_train_path, 
                                 term_vec_path1, term_vec_path2, 
                                 snomed_vec_path, batch_size=64, 
                                 shuffle=True, num_workers=10, gran=gran)
val_loader, valset = get_loader_mla(FULL_CHV_PATH, chv_dev_path, 
                                    term_vec_path1, term_vec_path2, 
                                    snomed_vec_path, batch_size=64, 
                                    shuffle=False, num_workers=10, gran=gran)
test_loader, testset = get_loader_mla(FULL_CHV_PATH, chv_test_path, 
                                      term_vec_path1, term_vec_path2, 
                                      snomed_vec_path, batch_size=64,
                                      shuffle=False,  num_workers=10, 
                                      gran=gran, load_target=True)

[target embeddings loaded, search space size: 350830]


In [7]:
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
#save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
#model.mla_bert.load_state_dict(torch.load(save_path))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train_joint(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.049 Acc@10: 0.157 Acc@100: 0.354 AccSum: 0.561
[epoch 1][test] Acc@1: 0.050 Acc@10: 0.161 Acc@100: 0.357 AccSum: 0.569
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.181 Acc@10: 0.426 Acc@100: 0.636 AccSum: 1.242
[epoch 2][test] Acc@1: 0.182 Acc@10: 0.433 Acc@100: 0.644 AccSum: 1.259
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.268 Acc@10: 0.537 Acc@100: 0.731 AccSum: 1.536
[epoch 3][test] Acc@1: 0.276 Acc@10: 0.551 Acc@100: 0.736 AccSum: 1.563
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.314 Acc@10: 0.580 Acc@100: 0.768 AccSum: 1.662
[epoch 4][test] Acc@1: 0.321 Acc@10: 0.588 Acc@100: 0.776 AccSum: 1.685
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.336 Acc@10: 0.597 Acc@100: 0.791 AccSum: 1.725
[epoch 5][test] Acc@1: 0.346 Acc@10: 0.607 Acc@100: 0.797 AccSum: 1.749
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.358 Acc@10: 0.621 Acc@100: 0.807 AccSum: 1.785
[epoch 6][test] Acc@1: 0.367 Acc@10: 0.632 Acc@100: 0.81

[epoch 54][dev ] Acc@1: 0.591 Acc@10: 0.860 Acc@100: 0.938 AccSum: 2.389
[epoch 55][dev ] Acc@1: 0.594 Acc@10: 0.860 Acc@100: 0.938 AccSum: 2.392
[epoch 55][test] Acc@1: 0.603 Acc@10: 0.857 Acc@100: 0.940 AccSum: 2.401
[best epoch: 55]
[epoch 56][dev ] Acc@1: 0.596 Acc@10: 0.860 Acc@100: 0.938 AccSum: 2.393
[epoch 56][test] Acc@1: 0.606 Acc@10: 0.858 Acc@100: 0.938 AccSum: 2.403
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.597 Acc@10: 0.862 Acc@100: 0.939 AccSum: 2.398
[epoch 57][test] Acc@1: 0.607 Acc@10: 0.860 Acc@100: 0.939 AccSum: 2.406
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.599 Acc@10: 0.864 Acc@100: 0.939 AccSum: 2.403
[epoch 58][test] Acc@1: 0.608 Acc@10: 0.860 Acc@100: 0.941 AccSum: 2.409
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.599 Acc@10: 0.862 Acc@100: 0.939 AccSum: 2.401
[epoch 59][test] Acc@1: 0.611 Acc@10: 0.860 Acc@100: 0.942 AccSum: 2.413
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.600 Acc@10: 0.864 Acc@100: 0.939 AccSum: 2.403
[epoch 60][test] Acc@1: 0.612 Acc@10: 0

In [5]:
save_path = "ensemble_w_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

mla_bert_ft_ensemble(
  (mla_bert): multilevel_attention(
    (self_attn): SelfAttention()
  )
  (fc): Linear(in_features=1068, out_features=1368, bias=True)
  (fc_aligner): fc_aligner(
    (fc1): Linear(in_features=300, out_features=300, bias=True)
  )
)

In [10]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, \
              dumb=True, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              dumb=False, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.734


In [11]:
# Dict + Neural
# accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=None, train_dict=train_dict, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[dict+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# # Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# # Dict + Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# Dict + Stoilos + Neural
accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
                           ed_dict=None, topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 69.0%
0.6554022988505747
[dict+smatch+model] Acc@1: 0.788 Acc@10: 0.876 Acc@100: 0.921 Acc sum: 2.585
