## Prelims

### configs

In [1]:
GPU_INDEX=0
DATA_DIR="../../data/splits/stratified_specific/"
FULL_CHV_PATH="../../data/chv.csv"
FEATURE_PATH="../../data/precomputed_features/"
SNOMED_PATH = "../../data/SnomedCT_201907/"

### imports

In [2]:
%load_ext autoreload
%autoreload 2

import torch
torch.manual_seed(2020) 
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../'))
from src.data import *
from src.loss import *
from src.models import *
from src.evaluation import *
from src.train import *

### load some dicts (to be used later)

In [3]:
# load SNOMED graph
from data.Snomed import Snomed
snomed = Snomed(SNOMED_PATH, taxonomy=False)
snomed.load_snomed()

In [4]:
# create surface_to_snomed_id dict
SF2ID = build_surface_to_snomed_id(snomed)

In [5]:
# create testset_row_index_to_ed_dict dict
ED_DICT = pkl.load(open(os.path.join(FEATURE_PATH,"term_ed_dic_stratified_specific.pkl"),"rb"))

## Experiments

### BERT align

#### static term -> surface

In [34]:
# input: static term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_plain_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path, 
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path,
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [35]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.048 Acc@10: 0.188 Acc@100: 0.397 AccSum: 0.633
[epoch 1][test] Acc@1: 0.056 Acc@10: 0.185 Acc@100: 0.385 AccSum: 0.626
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.103 Acc@10: 0.293 Acc@100: 0.493 AccSum: 0.889
[epoch 2][test] Acc@1: 0.106 Acc@10: 0.295 Acc@100: 0.489 AccSum: 0.890
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.176 Acc@10: 0.400 Acc@100: 0.587 AccSum: 1.163
[epoch 3][test] Acc@1: 0.181 Acc@10: 0.397 Acc@100: 0.589 AccSum: 1.167
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.179 Acc@10: 0.408 Acc@100: 0.592 AccSum: 1.180
[epoch 4][test] Acc@1: 0.188 Acc@10: 0.404 Acc@100: 0.593 AccSum: 1.184
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.191 Acc@10: 0.423 Acc@100: 0.609 AccSum: 1.223
[epoch 5][test] Acc@1: 0.200 Acc@10: 0.419 Acc@100: 0.609 AccSum: 1.227
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.201 Acc@10: 0.429 Acc@100: 0.615 AccSum: 1.244
[epoch 6][test] Acc@1: 0.211 Acc@10: 0.429 Acc@100: 0.62

[epoch 57][test] Acc@1: 0.288 Acc@10: 0.520 Acc@100: 0.689 AccSum: 1.498
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.284 Acc@10: 0.522 Acc@100: 0.679 AccSum: 1.484
[epoch 58][test] Acc@1: 0.291 Acc@10: 0.523 Acc@100: 0.689 AccSum: 1.503
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.284 Acc@10: 0.522 Acc@100: 0.678 AccSum: 1.485
[epoch 59][test] Acc@1: 0.290 Acc@10: 0.524 Acc@100: 0.688 AccSum: 1.503
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.284 Acc@10: 0.522 Acc@100: 0.679 AccSum: 1.486
[epoch 60][test] Acc@1: 0.290 Acc@10: 0.525 Acc@100: 0.689 AccSum: 1.504
[best epoch: 60]
[epoch 61][dev ] Acc@1: 0.283 Acc@10: 0.522 Acc@100: 0.679 AccSum: 1.484
[epoch 62][dev ] Acc@1: 0.283 Acc@10: 0.522 Acc@100: 0.680 AccSum: 1.485
[epoch 63][dev ] Acc@1: 0.284 Acc@10: 0.522 Acc@100: 0.680 AccSum: 1.486
[epoch 64][dev ] Acc@1: 0.284 Acc@10: 0.521 Acc@100: 0.682 AccSum: 1.487
[epoch 64][test] Acc@1: 0.290 Acc@10: 0.525 Acc@100: 0.691 AccSum: 1.506
[best epoch: 64]
[epoch 65][dev ] Acc@1: 0.284 Acc@10: 0

In [36]:
save_path = "bert_static_term_to_surface_state_dict.pkl"
torch.save(model.state_dict(), save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [37]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


MRR: 0.379


#### contextual_term -> surface

##### mutlilevel attention

In [38]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [39]:
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.211 Acc@10: 0.463 Acc@100: 0.651 AccSum: 1.325
[epoch 1][test] Acc@1: 0.217 Acc@10: 0.467 Acc@100: 0.656 AccSum: 1.340
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.233 Acc@10: 0.487 Acc@100: 0.672 AccSum: 1.392
[epoch 2][test] Acc@1: 0.239 Acc@10: 0.489 Acc@100: 0.679 AccSum: 1.407
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.277 Acc@10: 0.527 Acc@100: 0.713 AccSum: 1.517
[epoch 3][test] Acc@1: 0.278 Acc@10: 0.534 Acc@100: 0.720 AccSum: 1.531
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.274 Acc@10: 0.527 Acc@100: 0.709 AccSum: 1.511
[epoch 5][dev ] Acc@1: 0.297 Acc@10: 0.550 Acc@100: 0.732 AccSum: 1.579
[epoch 5][test] Acc@1: 0.295 Acc@10: 0.559 Acc@100: 0.737 AccSum: 1.591
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.286 Acc@10: 0.541 Acc@100: 0.719 AccSum: 1.546
[epoch 7][dev ] Acc@1: 0.309 Acc@10: 0.566 Acc@100: 0.744 AccSum: 1.619
[epoch 7][test] Acc@1: 0.309 Acc@10: 0.579 Acc@100: 0.749 AccSum: 1.637


[epoch 78][dev ] Acc@1: 0.356 Acc@10: 0.618 Acc@100: 0.788 AccSum: 1.762
[epoch 78][test] Acc@1: 0.354 Acc@10: 0.634 Acc@100: 0.792 AccSum: 1.780
[best epoch: 78]
[epoch 79][dev ] Acc@1: 0.357 Acc@10: 0.619 Acc@100: 0.787 AccSum: 1.763
[epoch 79][test] Acc@1: 0.355 Acc@10: 0.635 Acc@100: 0.792 AccSum: 1.782
[best epoch: 79]
[epoch 80][dev ] Acc@1: 0.357 Acc@10: 0.621 Acc@100: 0.786 AccSum: 1.764
[epoch 80][test] Acc@1: 0.352 Acc@10: 0.636 Acc@100: 0.792 AccSum: 1.779
[best epoch: 80]
[epoch 81][dev ] Acc@1: 0.356 Acc@10: 0.619 Acc@100: 0.785 AccSum: 1.760
[epoch 82][dev ] Acc@1: 0.353 Acc@10: 0.618 Acc@100: 0.785 AccSum: 1.756
[epoch 83][dev ] Acc@1: 0.354 Acc@10: 0.616 Acc@100: 0.784 AccSum: 1.755
[epoch 84][dev ] Acc@1: 0.356 Acc@10: 0.615 Acc@100: 0.785 AccSum: 1.756
[epoch 85][dev ] Acc@1: 0.356 Acc@10: 0.615 Acc@100: 0.784 AccSum: 1.755
[epoch 86][dev ] Acc@1: 0.356 Acc@10: 0.616 Acc@100: 0.784 AccSum: 1.756
[epoch 87][dev ] Acc@1: 0.356 Acc@10: 0.617 Acc@100: 0.784 AccSum: 1.757


In [40]:
save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
torch.save(best_sd, save_path)
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

multilevel_attention(
  (self_attn): SelfAttention()
  (fc): Linear(in_features=768, out_features=768, bias=True)
)

In [41]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


MRR: 0.455


##### linear + relu

In [6]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [7]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.123 Acc@10: 0.356 Acc@100: 0.578 AccSum: 1.057
[epoch 1][test] Acc@1: 0.133 Acc@10: 0.343 Acc@100: 0.575 AccSum: 1.051
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.189 Acc@10: 0.421 Acc@100: 0.640 AccSum: 1.249
[epoch 2][test] Acc@1: 0.182 Acc@10: 0.420 Acc@100: 0.637 AccSum: 1.240
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.181 Acc@10: 0.426 Acc@100: 0.631 AccSum: 1.239
[epoch 4][dev ] Acc@1: 0.203 Acc@10: 0.453 Acc@100: 0.649 AccSum: 1.305
[epoch 4][test] Acc@1: 0.202 Acc@10: 0.458 Acc@100: 0.657 AccSum: 1.317
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.204 Acc@10: 0.453 Acc@100: 0.652 AccSum: 1.309
[epoch 5][test] Acc@1: 0.210 Acc@10: 0.460 Acc@100: 0.657 AccSum: 1.328
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.221 Acc@10: 0.479 Acc@100: 0.668 AccSum: 1.368
[epoch 6][test] Acc@1: 0.225 Acc@10: 0.478 Acc@100: 0.675 AccSum: 1.378
[best epoch: 6]
[epoch 7][dev ] Acc@1: 0.229 Acc@10: 0.479 Acc@100: 0.66

[epoch 63][dev ] Acc@1: 0.273 Acc@10: 0.539 Acc@100: 0.721 AccSum: 1.533
[epoch 64][dev ] Acc@1: 0.274 Acc@10: 0.538 Acc@100: 0.720 AccSum: 1.532
[epoch 65][dev ] Acc@1: 0.276 Acc@10: 0.539 Acc@100: 0.721 AccSum: 1.536
[epoch 66][dev ] Acc@1: 0.276 Acc@10: 0.540 Acc@100: 0.722 AccSum: 1.537
[epoch 66][test] Acc@1: 0.282 Acc@10: 0.539 Acc@100: 0.731 AccSum: 1.552
[best epoch: 66]
[epoch 67][dev ] Acc@1: 0.276 Acc@10: 0.540 Acc@100: 0.724 AccSum: 1.540
[epoch 67][test] Acc@1: 0.284 Acc@10: 0.539 Acc@100: 0.731 AccSum: 1.554
[best epoch: 67]
[epoch 68][dev ] Acc@1: 0.277 Acc@10: 0.540 Acc@100: 0.725 AccSum: 1.542
[epoch 68][test] Acc@1: 0.283 Acc@10: 0.541 Acc@100: 0.732 AccSum: 1.555
[best epoch: 68]
[epoch 69][dev ] Acc@1: 0.275 Acc@10: 0.541 Acc@100: 0.724 AccSum: 1.541
[epoch 70][dev ] Acc@1: 0.275 Acc@10: 0.541 Acc@100: 0.723 AccSum: 1.540
[epoch 71][dev ] Acc@1: 0.276 Acc@10: 0.541 Acc@100: 0.723 AccSum: 1.540
[epoch 72][dev ] Acc@1: 0.277 Acc@10: 0.541 Acc@100: 0.723 AccSum: 1.540


In [8]:
save_path = "bert_context_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

val_or_test(model, test_loader, testset.search_space_embeddings, \
                epoch=-1, typ="test", bsz=64, GPU_INDEX=GPU_INDEX)

[epoch 0][test] Acc@1: 0.286 Acc@10: 0.550 Acc@100: 0.738 AccSum: 1.574


[0.285648889906157, 0.550240329594873, 0.737926298924239]

In [9]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=69.0), HTML(value='')))


MRR: 0.376


### fasttext align

#### term -> surface

In [10]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [11]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True,device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.003 AccSum: 0.003
[epoch 1][dev ] Acc@1: 0.045 Acc@10: 0.189 Acc@100: 0.443 AccSum: 0.677
[epoch 1][test] Acc@1: 0.043 Acc@10: 0.191 Acc@100: 0.452 AccSum: 0.685
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.145 Acc@10: 0.382 Acc@100: 0.668 AccSum: 1.195
[epoch 2][test] Acc@1: 0.147 Acc@10: 0.399 Acc@100: 0.673 AccSum: 1.219
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.160 Acc@10: 0.450 Acc@100: 0.721 AccSum: 1.332
[epoch 3][test] Acc@1: 0.169 Acc@10: 0.459 Acc@100: 0.730 AccSum: 1.358
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.200 Acc@10: 0.519 Acc@100: 0.765 AccSum: 1.485
[epoch 4][test] Acc@1: 0.214 Acc@10: 0.522 Acc@100: 0.768 AccSum: 1.505
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.222 Acc@10: 0.536 Acc@100: 0.787 AccSum: 1.546
[epoch 5][test] Acc@1: 0.231 Acc@10: 0.547 Acc@100: 0.786 AccSum: 1.564
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.240 Acc@10: 0.557 Acc@100: 0.798 AccSum: 1.595
[epoch 6][test] Acc@1: 0.251 Acc@10: 0.564 Acc@100: 0.79

[epoch 58][dev ] Acc@1: 0.373 Acc@10: 0.684 Acc@100: 0.871 AccSum: 1.927
[epoch 59][dev ] Acc@1: 0.373 Acc@10: 0.686 Acc@100: 0.869 AccSum: 1.928
[epoch 60][dev ] Acc@1: 0.375 Acc@10: 0.686 Acc@100: 0.869 AccSum: 1.930
[epoch 60][test] Acc@1: 0.375 Acc@10: 0.691 Acc@100: 0.877 AccSum: 1.942
[best epoch: 60]
[epoch 61][dev ] Acc@1: 0.374 Acc@10: 0.686 Acc@100: 0.870 AccSum: 1.930
[epoch 61][test] Acc@1: 0.377 Acc@10: 0.691 Acc@100: 0.877 AccSum: 1.946
[best epoch: 61]
[epoch 62][dev ] Acc@1: 0.375 Acc@10: 0.688 Acc@100: 0.870 AccSum: 1.933
[epoch 62][test] Acc@1: 0.378 Acc@10: 0.693 Acc@100: 0.877 AccSum: 1.948
[best epoch: 62]
[epoch 63][dev ] Acc@1: 0.374 Acc@10: 0.687 Acc@100: 0.870 AccSum: 1.931
[epoch 64][dev ] Acc@1: 0.376 Acc@10: 0.687 Acc@100: 0.870 AccSum: 1.933
[epoch 65][dev ] Acc@1: 0.376 Acc@10: 0.688 Acc@100: 0.870 AccSum: 1.934
[epoch 65][test] Acc@1: 0.378 Acc@10: 0.691 Acc@100: 0.876 AccSum: 1.945
[best epoch: 65]
[epoch 66][dev ] Acc@1: 0.376 Acc@10: 0.689 Acc@100: 0.8

In [12]:
save_path = "ft_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [13]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


MRR: 0.495


#### term -> graph

In [14]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [15]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.007 Acc@10: 0.031 Acc@100: 0.124 AccSum: 0.162
[epoch 1][test] Acc@1: 0.008 Acc@10: 0.033 Acc@100: 0.125 AccSum: 0.165
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.016 Acc@10: 0.071 Acc@100: 0.205 AccSum: 0.292
[epoch 2][test] Acc@1: 0.017 Acc@10: 0.069 Acc@100: 0.208 AccSum: 0.295
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.026 Acc@10: 0.088 Acc@100: 0.236 AccSum: 0.350
[epoch 3][test] Acc@1: 0.025 Acc@10: 0.089 Acc@100: 0.244 AccSum: 0.358
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.028 Acc@10: 0.100 Acc@100: 0.284 AccSum: 0.412
[epoch 4][test] Acc@1: 0.030 Acc@10: 0.104 Acc@100: 0.286 AccSum: 0.420
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.037 Acc@10: 0.109 Acc@100: 0.298 AccSum: 0.444
[epoch 5][test] Acc@1: 0.038 Acc@10: 0.119 Acc@100: 0.297 AccSum: 0.454
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.038 Acc@10: 0.124 Acc@100: 0.323 AccSum: 0.486
[epoch 6][test] Acc@1: 0.039 Acc@10: 0.132 Acc@100: 0.32

[epoch 53][test] Acc@1: 0.117 Acc@10: 0.301 Acc@100: 0.542 AccSum: 0.960
[best epoch: 53]
[epoch 54][dev ] Acc@1: 0.117 Acc@10: 0.290 Acc@100: 0.545 AccSum: 0.951
[epoch 55][dev ] Acc@1: 0.117 Acc@10: 0.292 Acc@100: 0.545 AccSum: 0.954
[epoch 56][dev ] Acc@1: 0.117 Acc@10: 0.292 Acc@100: 0.546 AccSum: 0.956
[epoch 56][test] Acc@1: 0.117 Acc@10: 0.303 Acc@100: 0.543 AccSum: 0.963
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.119 Acc@10: 0.293 Acc@100: 0.548 AccSum: 0.960
[epoch 57][test] Acc@1: 0.118 Acc@10: 0.302 Acc@100: 0.545 AccSum: 0.965
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.122 Acc@10: 0.295 Acc@100: 0.547 AccSum: 0.965
[epoch 58][test] Acc@1: 0.119 Acc@10: 0.304 Acc@100: 0.546 AccSum: 0.969
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.126 Acc@10: 0.296 Acc@100: 0.547 AccSum: 0.969
[epoch 59][test] Acc@1: 0.121 Acc@10: 0.305 Acc@100: 0.547 AccSum: 0.973
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.124 Acc@10: 0.298 Acc@100: 0.549 AccSum: 0.971
[epoch 60][test] Acc@1: 0.120 Acc@10: 0

In [16]:
save_path = "term_to_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [17]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=256, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False,\
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=18.0), HTML(value='')))


MRR: 0.203


#### term -> surface+graph

In [18]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [19]:
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.051 Acc@10: 0.184 Acc@100: 0.463 AccSum: 0.697
[epoch 1][test] Acc@1: 0.051 Acc@10: 0.189 Acc@100: 0.461 AccSum: 0.702
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.092 Acc@10: 0.309 Acc@100: 0.595 AccSum: 0.996
[epoch 2][test] Acc@1: 0.097 Acc@10: 0.320 Acc@100: 0.602 AccSum: 1.019
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.136 Acc@10: 0.380 Acc@100: 0.668 AccSum: 1.184
[epoch 3][test] Acc@1: 0.141 Acc@10: 0.384 Acc@100: 0.668 AccSum: 1.192
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.155 Acc@10: 0.433 Acc@100: 0.713 AccSum: 1.301
[epoch 4][test] Acc@1: 0.165 Acc@10: 0.441 Acc@100: 0.711 AccSum: 1.317
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.190 Acc@10: 0.472 Acc@100: 0.738 AccSum: 1.400
[epoch 5][test] Acc@1: 0.196 Acc@10: 0.475 Acc@100: 0.736 AccSum: 1.407
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.208 Acc@10: 0.492 Acc@100: 0.751 AccSum: 1.450
[epoch 6][test] Acc@1: 0.211 Acc@10: 0.493 Acc@100: 0.75

[epoch 55][dev ] Acc@1: 0.396 Acc@10: 0.696 Acc@100: 0.872 AccSum: 1.965
[epoch 55][test] Acc@1: 0.403 Acc@10: 0.715 Acc@100: 0.883 AccSum: 2.001
[best epoch: 55]
[epoch 56][dev ] Acc@1: 0.398 Acc@10: 0.698 Acc@100: 0.873 AccSum: 1.970
[epoch 56][test] Acc@1: 0.405 Acc@10: 0.715 Acc@100: 0.885 AccSum: 2.006
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.398 Acc@10: 0.699 Acc@100: 0.874 AccSum: 1.971
[epoch 57][test] Acc@1: 0.404 Acc@10: 0.716 Acc@100: 0.885 AccSum: 2.005
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.400 Acc@10: 0.698 Acc@100: 0.875 AccSum: 1.973
[epoch 58][test] Acc@1: 0.406 Acc@10: 0.717 Acc@100: 0.885 AccSum: 2.008
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.402 Acc@10: 0.698 Acc@100: 0.875 AccSum: 1.976
[epoch 59][test] Acc@1: 0.407 Acc@10: 0.717 Acc@100: 0.886 AccSum: 2.009
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.403 Acc@10: 0.700 Acc@100: 0.875 AccSum: 1.978
[epoch 60][test] Acc@1: 0.408 Acc@10: 0.719 Acc@100: 0.886 AccSum: 2.014
[best epoch: 60]
[epoch 61][dev ] Acc@1

In [20]:
save_path = "ft_term_to_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=600, bias=True)
)

In [21]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, sample=False,\
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


MRR: 0.541


### combine BERT with fasttext

#### ft_term+bert_term -> ft_surface+bert_surface+graph

In [6]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

gran = "specific"
train_dict = build_train_dict(chv_train_path, gran=gran)

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran=gran)
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran=gran)
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran=gran, 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [24]:
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.190 Acc@10: 0.453 Acc@100: 0.680 AccSum: 1.323
[epoch 1][test] Acc@1: 0.190 Acc@10: 0.445 Acc@100: 0.678 AccSum: 1.312
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.248 Acc@10: 0.531 Acc@100: 0.755 AccSum: 1.534
[epoch 2][test] Acc@1: 0.239 Acc@10: 0.523 Acc@100: 0.751 AccSum: 1.512
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.271 Acc@10: 0.564 Acc@100: 0.782 AccSum: 1.617
[epoch 3][test] Acc@1: 0.274 Acc@10: 0.562 Acc@100: 0.775 AccSum: 1.611
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.290 Acc@10: 0.576 Acc@100: 0.787 AccSum: 1.654
[epoch 4][test] Acc@1: 0.291 Acc@10: 0.584 Acc@100: 0.786 AccSum: 1.661
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.311 Acc@10: 0.604 Acc@100: 0.801 AccSum: 1.716
[epoch 5][test] Acc@1: 0.312 Acc@10: 0.605 Acc@100: 0.805 AccSum: 1.722
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.326 Acc@10: 0.612 Acc@100: 0.812 AccSum: 1.750
[epoch 6][test] Acc@1: 0.330 Acc@10: 0.615 Acc@100: 0.81

[epoch 57][test] Acc@1: 0.453 Acc@10: 0.745 Acc@100: 0.887 AccSum: 2.085
[best epoch: 57]
[epoch 58][dev ] Acc@1: 0.444 Acc@10: 0.733 Acc@100: 0.885 AccSum: 2.062
[epoch 58][test] Acc@1: 0.455 Acc@10: 0.746 Acc@100: 0.887 AccSum: 2.088
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.444 Acc@10: 0.734 Acc@100: 0.884 AccSum: 2.063
[epoch 59][test] Acc@1: 0.455 Acc@10: 0.747 Acc@100: 0.888 AccSum: 2.090
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.446 Acc@10: 0.735 Acc@100: 0.885 AccSum: 2.065
[epoch 60][test] Acc@1: 0.456 Acc@10: 0.747 Acc@100: 0.889 AccSum: 2.092
[best epoch: 60]
[epoch 61][dev ] Acc@1: 0.447 Acc@10: 0.734 Acc@100: 0.884 AccSum: 2.066
[epoch 61][test] Acc@1: 0.456 Acc@10: 0.748 Acc@100: 0.889 AccSum: 2.093
[best epoch: 61]
[epoch 62][dev ] Acc@1: 0.446 Acc@10: 0.734 Acc@100: 0.885 AccSum: 2.065
[epoch 63][dev ] Acc@1: 0.447 Acc@10: 0.735 Acc@100: 0.885 AccSum: 2.067
[epoch 63][test] Acc@1: 0.456 Acc@10: 0.748 Acc@100: 0.887 AccSum: 2.092
[best epoch: 63]
[epoch 64][dev ] Acc@1

In [7]:
save_path = "ft_bert_context_term_to_ft_bert_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1368, bias=True)
)

In [8]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=69.0), HTML(value='')))


MRR: 0.581


In [10]:
# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=None, train_dict=train_dict, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[dict+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

# test back-off
accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
                           topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 51.1%
0.44632639047837036
[dict+model] Acc@1: 0.657 Acc@10: 0.787 Acc@100: 0.862 Acc sum: 2.306
ratio attempted by heuristics: 45.2%
0.381551842526894
[smatch+model] Acc@1: 0.590 Acc@10: 0.751 Acc@100: 0.847 Acc sum: 2.189
ratio attempted by heuristics: 69.3%
0.596932936598764
[dict+smatch+model] Acc@1: 0.708 Acc@10: 0.785 Acc@100: 0.841 Acc sum: 2.335


#### ft_term+bert_term -> ft_surface+bert_surface

In [11]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [12]:
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.182 Acc@10: 0.441 Acc@100: 0.677 AccSum: 1.300
[epoch 1][test] Acc@1: 0.190 Acc@10: 0.442 Acc@100: 0.672 AccSum: 1.304
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.230 Acc@10: 0.498 Acc@100: 0.729 AccSum: 1.457
[epoch 2][test] Acc@1: 0.233 Acc@10: 0.505 Acc@100: 0.726 AccSum: 1.464
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.249 Acc@10: 0.521 Acc@100: 0.739 AccSum: 1.508
[epoch 3][test] Acc@1: 0.253 Acc@10: 0.526 Acc@100: 0.735 AccSum: 1.514
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.267 Acc@10: 0.544 Acc@100: 0.764 AccSum: 1.575
[epoch 4][test] Acc@1: 0.279 Acc@10: 0.550 Acc@100: 0.753 AccSum: 1.582
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.287 Acc@10: 0.566 Acc@100: 0.776 AccSum: 1.629
[epoch 5][test] Acc@1: 0.290 Acc@10: 0.572 Acc@100: 0.771 AccSum: 1.633
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.293 Acc@10: 0.573 Acc@100: 0.779 AccSum: 1.645
[epoch 6][test] Acc@1: 0.293 Acc@10: 0.577 Acc@100: 0.77

[epoch 55][test] Acc@1: 0.398 Acc@10: 0.685 Acc@100: 0.855 AccSum: 1.938
[best epoch: 55]
[epoch 56][dev ] Acc@1: 0.391 Acc@10: 0.679 Acc@100: 0.857 AccSum: 1.928
[epoch 56][test] Acc@1: 0.397 Acc@10: 0.686 Acc@100: 0.856 AccSum: 1.940
[best epoch: 56]
[epoch 57][dev ] Acc@1: 0.390 Acc@10: 0.680 Acc@100: 0.857 AccSum: 1.927
[epoch 58][dev ] Acc@1: 0.394 Acc@10: 0.679 Acc@100: 0.859 AccSum: 1.931
[epoch 58][test] Acc@1: 0.398 Acc@10: 0.689 Acc@100: 0.857 AccSum: 1.944
[best epoch: 58]
[epoch 59][dev ] Acc@1: 0.392 Acc@10: 0.678 Acc@100: 0.859 AccSum: 1.929
[epoch 60][dev ] Acc@1: 0.393 Acc@10: 0.678 Acc@100: 0.859 AccSum: 1.931
[epoch 61][dev ] Acc@1: 0.393 Acc@10: 0.678 Acc@100: 0.861 AccSum: 1.932
[epoch 61][test] Acc@1: 0.399 Acc@10: 0.691 Acc@100: 0.858 AccSum: 1.948
[best epoch: 61]
[epoch 62][dev ] Acc@1: 0.394 Acc@10: 0.680 Acc@100: 0.860 AccSum: 1.934
[epoch 62][test] Acc@1: 0.399 Acc@10: 0.690 Acc@100: 0.859 AccSum: 1.948
[best epoch: 62]
[epoch 63][dev ] Acc@1: 0.395 Acc@10: 0

In [14]:
save_path = "ft_bert_context_term_to_ft_bert_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1068, bias=True)
)

In [15]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=69.0), HTML(value='')))


MRR: 0.517


####  ft_term+bert_term (multilevel attention) -> ft_surface+bert_surface(+graph)

In [16]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
#snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path1 = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")
term_vec_path2 = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

gran="specific"

train_dict = build_train_dict(chv_train_path, gran=gran)

train_loader, _ = get_loader_mla(FULL_CHV_PATH, chv_train_path, 
                                 term_vec_path1, term_vec_path2, 
                                 snomed_vec_path, batch_size=64, 
                                 shuffle=True, num_workers=10, gran=gran)
val_loader, valset = get_loader_mla(FULL_CHV_PATH, chv_dev_path, 
                                    term_vec_path1, term_vec_path2, 
                                    snomed_vec_path, batch_size=64, 
                                    shuffle=False, num_workers=10, gran=gran)
test_loader, testset = get_loader_mla(FULL_CHV_PATH, chv_test_path, 
                                      term_vec_path1, term_vec_path2, 
                                      snomed_vec_path, batch_size=64,
                                      shuffle=False,  num_workers=10, 
                                      gran=gran, load_target=True)

[target embeddings loaded, search space size: 350830]


In [17]:
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
#save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
#model.mla_bert.load_state_dict(torch.load(save_path))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train_joint(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=100, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.038 Acc@10: 0.153 Acc@100: 0.347 AccSum: 0.538
[epoch 1][test] Acc@1: 0.043 Acc@10: 0.157 Acc@100: 0.348 AccSum: 0.549
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.188 Acc@10: 0.452 Acc@100: 0.684 AccSum: 1.324
[epoch 2][test] Acc@1: 0.203 Acc@10: 0.458 Acc@100: 0.676 AccSum: 1.337
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.278 Acc@10: 0.552 Acc@100: 0.746 AccSum: 1.577
[epoch 3][test] Acc@1: 0.283 Acc@10: 0.557 Acc@100: 0.738 AccSum: 1.579
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.293 Acc@10: 0.572 Acc@100: 0.764 AccSum: 1.630
[epoch 4][test] Acc@1: 0.304 Acc@10: 0.578 Acc@100: 0.762 AccSum: 1.644
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.335 Acc@10: 0.611 Acc@100: 0.793 AccSum: 1.739
[epoch 5][test] Acc@1: 0.334 Acc@10: 0.611 Acc@100: 0.795 AccSum: 1.740
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.346 Acc@10: 0.616 Acc@100: 0.797 AccSum: 1.760
[epoch 6][test] Acc@1: 0.346 Acc@10: 0.623 Acc@100: 0.80

[epoch 58][dev ] Acc@1: 0.564 Acc@10: 0.821 Acc@100: 0.925 AccSum: 2.310
[epoch 59][dev ] Acc@1: 0.568 Acc@10: 0.825 Acc@100: 0.926 AccSum: 2.319
[epoch 59][test] Acc@1: 0.583 Acc@10: 0.843 Acc@100: 0.935 AccSum: 2.360
[best epoch: 59]
[epoch 60][dev ] Acc@1: 0.574 Acc@10: 0.829 Acc@100: 0.929 AccSum: 2.332
[epoch 60][test] Acc@1: 0.587 Acc@10: 0.844 Acc@100: 0.937 AccSum: 2.367
[best epoch: 60]
[epoch 61][dev ] Acc@1: 0.574 Acc@10: 0.829 Acc@100: 0.930 AccSum: 2.332
[epoch 62][dev ] Acc@1: 0.571 Acc@10: 0.826 Acc@100: 0.927 AccSum: 2.324
[epoch 63][dev ] Acc@1: 0.573 Acc@10: 0.827 Acc@100: 0.927 AccSum: 2.327
[epoch 64][dev ] Acc@1: 0.572 Acc@10: 0.832 Acc@100: 0.928 AccSum: 2.332
[epoch 65][dev ] Acc@1: 0.575 Acc@10: 0.832 Acc@100: 0.929 AccSum: 2.336
[epoch 65][test] Acc@1: 0.590 Acc@10: 0.848 Acc@100: 0.934 AccSum: 2.373
[best epoch: 65]
[epoch 66][dev ] Acc@1: 0.575 Acc@10: 0.830 Acc@100: 0.927 AccSum: 2.332
[epoch 67][dev ] Acc@1: 0.576 Acc@10: 0.830 Acc@100: 0.928 AccSum: 2.334


In [18]:
save_path = "ensemble_w_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

mla_bert_ft_ensemble(
  (mla_bert): multilevel_attention(
    (self_attn): SelfAttention()
  )
  (fc): Linear(in_features=1068, out_features=1368, bias=True)
  (fc_aligner): fc_aligner(
    (fc1): Linear(in_features=300, out_features=300, bias=True)
  )
)

In [19]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, dumb=True, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              dumb=False, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=69.0), HTML(value='')))


MRR: 0.702


In [20]:
# Dict + Neural
# accs, pred_dict = compute_metrics_backoff(test_loader, testset, model, sf2id=None, train_dict=train_dict, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[dict+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# # Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# # Dict + Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# Dict + Stoilos + Neural
accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=train_dict, \
                           ed_dict=None, topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 69.3%
0.596932936598764
[dict+smatch+model] Acc@1: 0.734 Acc@10: 0.815 Acc@100: 0.857 Acc sum: 2.405
