## Prelims

### configs

In [1]:
GPU_INDEX=0
DATA_DIR="../../data/splits/zeroshot_specific/"
FULL_CHV_PATH="../../data/chv.csv"
#FEATURE_PATH="../../data/precomputed_features/"
FEATURE_PATH="/mnt/HDD_2T/data_Marco/"
SNOMED_PATH = "../../data/SnomedCT_201907/"

### imports

In [2]:
%load_ext autoreload
%autoreload 2

import torch
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../'))
from src.data import *
from src.loss import *
from src.models import *
from src.evaluation import *
from src.train import *

In [3]:
torch.manual_seed(2020)

<torch._C.Generator at 0x7f30b4129f90>

### load some dicts (to be used later)

In [4]:
# load SNOMED graph
from data.Snomed import Snomed
snomed = Snomed(SNOMED_PATH, taxonomy=False)
snomed.load_snomed()

In [5]:
# create surface_to_snomed_id dict
SF2ID = build_surface_to_snomed_id(snomed)

In [6]:
# create testset_row_index_to_ed_dict dict
ED_DICT = pkl.load(open(os.path.join(FEATURE_PATH,"term_ed_dic_zeroshot_specific.pkl"),"rb"))

## Experiments

### BERT align

#### static term -> surface

In [11]:
# input: static term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_plain_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path, 
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path,
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [12]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.064 Acc@10: 0.185 Acc@100: 0.444 AccSum: 0.694
[epoch 1][test] Acc@1: 0.062 Acc@10: 0.201 Acc@100: 0.431 AccSum: 0.694
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.121 Acc@10: 0.323 Acc@100: 0.537 AccSum: 0.981
[epoch 2][test] Acc@1: 0.115 Acc@10: 0.333 Acc@100: 0.538 AccSum: 0.986
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.189 Acc@10: 0.408 Acc@100: 0.609 AccSum: 1.205
[epoch 3][test] Acc@1: 0.186 Acc@10: 0.407 Acc@100: 0.596 AccSum: 1.189
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.171 Acc@10: 0.403 Acc@100: 0.612 AccSum: 1.186
[epoch 5][dev ] Acc@1: 0.186 Acc@10: 0.420 Acc@100: 0.613 AccSum: 1.220
[epoch 5][test] Acc@1: 0.196 Acc@10: 0.407 Acc@100: 0.604 AccSum: 1.207
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.205 Acc@10: 0.426 Acc@100: 0.624 AccSum: 1.255
[epoch 6][test] Acc@1: 0.196 Acc@10: 0.409 Acc@100: 0.611 AccSum: 1.217
[best epoch: 6]
[epoch 7][dev ] Acc@1: 0.204 Acc@10: 0.440 Acc@100: 0.62

In [13]:
save_path = "bert_static_term_to_surface_state_dict.pkl"
torch.save(model.state_dict(), save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [14]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.319


#### contextual_term -> surface

##### mutlilevel attention

In [15]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [16]:
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.181 Acc@10: 0.411 Acc@100: 0.608 AccSum: 1.200
[epoch 1][test] Acc@1: 0.173 Acc@10: 0.395 Acc@100: 0.622 AccSum: 1.190
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.182 Acc@10: 0.439 Acc@100: 0.641 AccSum: 1.262
[epoch 2][test] Acc@1: 0.197 Acc@10: 0.414 Acc@100: 0.630 AccSum: 1.241
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.269 Acc@10: 0.511 Acc@100: 0.700 AccSum: 1.480
[epoch 3][test] Acc@1: 0.246 Acc@10: 0.496 Acc@100: 0.703 AccSum: 1.445
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.212 Acc@10: 0.475 Acc@100: 0.651 AccSum: 1.337
[epoch 5][dev ] Acc@1: 0.264 Acc@10: 0.539 Acc@100: 0.703 AccSum: 1.505
[epoch 5][test] Acc@1: 0.251 Acc@10: 0.515 Acc@100: 0.710 AccSum: 1.476
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.269 Acc@10: 0.532 Acc@100: 0.706 AccSum: 1.507
[epoch 6][test] Acc@1: 0.254 Acc@10: 0.508 Acc@100: 0.707 AccSum: 1.468
[best epoch: 6]
[epoch 7][dev ] Acc@1: 0.251 Acc@10: 0.516 Acc@100: 0.68

In [19]:
save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
torch.save(best_sd, save_path)
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

multilevel_attention(
  (self_attn): SelfAttention()
  (fc): Linear(in_features=768, out_features=768, bias=True)
)

In [20]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.349


##### linear + relu

In [21]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [22]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.113 Acc@10: 0.308 Acc@100: 0.539 AccSum: 0.960
[epoch 1][test] Acc@1: 0.135 Acc@10: 0.324 Acc@100: 0.550 AccSum: 1.009
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.172 Acc@10: 0.401 Acc@100: 0.609 AccSum: 1.182
[epoch 2][test] Acc@1: 0.168 Acc@10: 0.392 Acc@100: 0.616 AccSum: 1.176
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.202 Acc@10: 0.427 Acc@100: 0.632 AccSum: 1.261
[epoch 3][test] Acc@1: 0.192 Acc@10: 0.407 Acc@100: 0.633 AccSum: 1.232
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.218 Acc@10: 0.444 Acc@100: 0.653 AccSum: 1.315
[epoch 4][test] Acc@1: 0.204 Acc@10: 0.422 Acc@100: 0.652 AccSum: 1.279
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.226 Acc@10: 0.457 Acc@100: 0.661 AccSum: 1.344
[epoch 5][test] Acc@1: 0.212 Acc@10: 0.437 Acc@100: 0.662 AccSum: 1.311
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.230 Acc@10: 0.464 Acc@100: 0.657 AccSum: 1.351
[epoch 6][test] Acc@1: 0.216 Acc@10: 0.439 Acc@100: 0.65

In [25]:
save_path = "bert_context_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

# val_or_test(model, test_loader, testset.search_space_embeddings, \
#                 epoch=-1, typ="test", bsz=128, GPU_INDEX=GPU_INDEX)

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [26]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.308


### fasttext align

#### term -> surface

In [27]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [28]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True,device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.051 Acc@10: 0.206 Acc@100: 0.465 AccSum: 0.721
[epoch 1][test] Acc@1: 0.030 Acc@10: 0.180 Acc@100: 0.443 AccSum: 0.654
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.113 Acc@10: 0.345 Acc@100: 0.616 AccSum: 1.074
[epoch 2][test] Acc@1: 0.099 Acc@10: 0.329 Acc@100: 0.645 AccSum: 1.074
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.130 Acc@10: 0.389 Acc@100: 0.682 AccSum: 1.202
[epoch 3][test] Acc@1: 0.121 Acc@10: 0.379 Acc@100: 0.691 AccSum: 1.191
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.144 Acc@10: 0.429 Acc@100: 0.706 AccSum: 1.278
[epoch 4][test] Acc@1: 0.138 Acc@10: 0.420 Acc@100: 0.731 AccSum: 1.289
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.156 Acc@10: 0.466 Acc@100: 0.723 AccSum: 1.345
[epoch 5][test] Acc@1: 0.159 Acc@10: 0.449 Acc@100: 0.756 AccSum: 1.364
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.162 Acc@10: 0.480 Acc@100: 0.736 AccSum: 1.378
[epoch 6][test] Acc@1: 0.160 Acc@10: 0.456 Acc@100: 0.75

In [31]:
save_path = "ft_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [32]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.311


#### term -> graph

In [33]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [34]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.006 Acc@10: 0.033 Acc@100: 0.127 AccSum: 0.167
[epoch 1][test] Acc@1: 0.014 Acc@10: 0.032 Acc@100: 0.104 AccSum: 0.149
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.006 Acc@10: 0.025 Acc@100: 0.144 AccSum: 0.176
[epoch 2][test] Acc@1: 0.011 Acc@10: 0.053 Acc@100: 0.145 AccSum: 0.210
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.009 Acc@10: 0.040 Acc@100: 0.178 AccSum: 0.227
[epoch 3][test] Acc@1: 0.012 Acc@10: 0.080 Acc@100: 0.223 AccSum: 0.315
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.011 Acc@10: 0.033 Acc@100: 0.160 AccSum: 0.204
[epoch 5][dev ] Acc@1: 0.015 Acc@10: 0.048 Acc@100: 0.214 AccSum: 0.277
[epoch 5][test] Acc@1: 0.021 Acc@10: 0.093 Acc@100: 0.259 AccSum: 0.372
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.015 Acc@10: 0.050 Acc@100: 0.202 AccSum: 0.267
[epoch 7][dev ] Acc@1: 0.018 Acc@10: 0.067 Acc@100: 0.213 AccSum: 0.299
[epoch 7][test] Acc@1: 0.022 Acc@10: 0.106 Acc@100: 0.274 AccSum: 0.402


In [35]:
save_path = "term_to_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [36]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=256, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False,\
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=17.0), HTML(value='')))


MRR: 0.064


#### term -> surface+graph

In [7]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [8]:
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.044 Acc@10: 0.169 Acc@100: 0.412 AccSum: 0.624
[epoch 1][test] Acc@1: 0.032 Acc@10: 0.171 Acc@100: 0.442 AccSum: 0.645
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.081 Acc@10: 0.281 Acc@100: 0.532 AccSum: 0.894
[epoch 2][test] Acc@1: 0.065 Acc@10: 0.281 Acc@100: 0.572 AccSum: 0.919
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.090 Acc@10: 0.304 Acc@100: 0.564 AccSum: 0.958
[epoch 3][test] Acc@1: 0.089 Acc@10: 0.307 Acc@100: 0.595 AccSum: 0.991
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.095 Acc@10: 0.346 Acc@100: 0.586 AccSum: 1.027
[epoch 4][test] Acc@1: 0.105 Acc@10: 0.346 Acc@100: 0.633 AccSum: 1.084
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.107 Acc@10: 0.327 Acc@100: 0.602 AccSum: 1.035
[epoch 5][test] Acc@1: 0.107 Acc@10: 0.359 Acc@100: 0.647 AccSum: 1.113
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.110 Acc@10: 0.349 Acc@100: 0.605 AccSum: 1.063
[epoch 6][test] Acc@1: 0.116 Acc@10: 0.373 Acc@100: 0.65

In [9]:
save_path = "ft_term_to_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=600, bias=True)
)

In [10]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.227


### combine BERT with fasttext

#### ft_term+bert_term -> ft_surface+bert_surface+graph

In [11]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [12]:
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.182 Acc@10: 0.431 Acc@100: 0.670 AccSum: 1.283
[epoch 1][test] Acc@1: 0.173 Acc@10: 0.425 Acc@100: 0.682 AccSum: 1.279
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.196 Acc@10: 0.480 Acc@100: 0.709 AccSum: 1.385
[epoch 2][test] Acc@1: 0.211 Acc@10: 0.488 Acc@100: 0.735 AccSum: 1.433
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.201 Acc@10: 0.496 Acc@100: 0.718 AccSum: 1.415
[epoch 3][test] Acc@1: 0.224 Acc@10: 0.497 Acc@100: 0.728 AccSum: 1.449
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.215 Acc@10: 0.507 Acc@100: 0.722 AccSum: 1.444
[epoch 4][test] Acc@1: 0.234 Acc@10: 0.518 Acc@100: 0.746 AccSum: 1.498
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.248 Acc@10: 0.532 Acc@100: 0.747 AccSum: 1.527
[epoch 5][test] Acc@1: 0.246 Acc@10: 0.527 Acc@100: 0.759 AccSum: 1.532
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.256 Acc@10: 0.538 Acc@100: 0.753 AccSum: 1.547
[epoch 6][test] Acc@1: 0.242 Acc@10: 0.529 Acc@100: 0.75

In [13]:
save_path = "ft_bert_context_term_to_ft_bert_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1368, bias=True)
)

In [14]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=67.0), HTML(value='')))


MRR: 0.373


#### ft_term+bert_term -> ft_surface+bert_surface

In [7]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="specific")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="specific")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="specific", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [8]:
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.003 Acc@100: 0.006 AccSum: 0.009
[epoch 1][dev ] Acc@1: 0.146 Acc@10: 0.373 Acc@100: 0.615 AccSum: 1.134
[epoch 1][test] Acc@1: 0.157 Acc@10: 0.385 Acc@100: 0.608 AccSum: 1.150
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.221 Acc@10: 0.466 Acc@100: 0.668 AccSum: 1.355
[epoch 2][test] Acc@1: 0.210 Acc@10: 0.465 Acc@100: 0.694 AccSum: 1.368
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.248 Acc@10: 0.505 Acc@100: 0.695 AccSum: 1.448
[epoch 3][test] Acc@1: 0.232 Acc@10: 0.492 Acc@100: 0.715 AccSum: 1.440
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.270 Acc@10: 0.518 Acc@100: 0.715 AccSum: 1.503
[epoch 4][test] Acc@1: 0.246 Acc@10: 0.511 Acc@100: 0.738 AccSum: 1.495
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.282 Acc@10: 0.530 Acc@100: 0.725 AccSum: 1.537
[epoch 5][test] Acc@1: 0.250 Acc@10: 0.518 Acc@100: 0.746 AccSum: 1.514
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.287 Acc@10: 0.542 Acc@100: 0.740 AccSum: 1.569
[epoch 6][test] Acc@1: 0.264 Acc@10: 0.529 Acc@100: 0.75

In [9]:
save_path = "ft_bert_context_term_to_ft_bert_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1068, bias=True)
)

In [10]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=34.0), HTML(value='')))


MRR: 0.373


####  ft_term+bert_term (multilevel attention) -> ft_surface+bert_surface(+graph)

In [7]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
#snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path1 = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")
term_vec_path2 = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

gran="specific"

train_dict = build_train_dict(chv_train_path, gran=gran)

train_loader, _ = get_loader_mla(FULL_CHV_PATH, chv_train_path, 
                                 term_vec_path1, term_vec_path2, 
                                 snomed_vec_path, batch_size=64, 
                                 shuffle=True, num_workers=10, gran=gran)
val_loader, valset = get_loader_mla(FULL_CHV_PATH, chv_dev_path, 
                                    term_vec_path1, term_vec_path2, 
                                    snomed_vec_path, batch_size=64, 
                                    shuffle=False, num_workers=10, gran=gran)
test_loader, testset = get_loader_mla(FULL_CHV_PATH, chv_test_path, 
                                      term_vec_path1, term_vec_path2, 
                                      snomed_vec_path, batch_size=64,
                                      shuffle=False,  num_workers=10, 
                                      gran=gran, load_target=True)

[target embeddings loaded, search space size: 350830]


In [14]:
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)

# init mla_bert using previous weights
#save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
#model.mla_bert.load_state_dict({"self_attn.att_weights":torch.load(save_path)["self_attn.att_weights"]})
#model.mla_bert.load_state_dict(torch.load(save_path))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train_joint(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][test] Acc@1: 0.056 Acc@10: 0.154 Acc@100: 0.344 AccSum: 0.554
[best epoch: 1]
[epoch 2][test] Acc@1: 0.157 Acc@10: 0.396 Acc@100: 0.615 AccSum: 1.168
[best epoch: 2]
[epoch 3][test] Acc@1: 0.206 Acc@10: 0.480 Acc@100: 0.688 AccSum: 1.374
[best epoch: 3]
[epoch 4][test] Acc@1: 0.241 Acc@10: 0.515 Acc@100: 0.725 AccSum: 1.481
[best epoch: 4]
[epoch 5][test] Acc@1: 0.253 Acc@10: 0.524 Acc@100: 0.737 AccSum: 1.513
[best epoch: 5]
[epoch 6][test] Acc@1: 0.277 Acc@10: 0.550 Acc@100: 0.763 AccSum: 1.590
[best epoch: 6]
[epoch 7][test] Acc@1: 0.278 Acc@10: 0.558 Acc@100: 0.764 AccSum: 1.600
[best epoch: 7]
[epoch 8][test] Acc@1: 0.284 Acc@10: 0.575 Acc@100: 0.777 AccSum: 1.635
[best epoch: 8]
[epoch 9][test] Acc@1: 0.285 Acc@10: 0.583 Acc@100: 0.781 AccSum: 1.648
[best epoch: 9]
[epoch 10][test] Acc@1: 0.282 Acc@10: 0.586 Acc@100: 0.782 AccSum: 1.650
[epoch 11][test] Acc@1: 0.295 Acc@10: 0.599 Acc@100: 0.796 AccS

In [8]:
save_path = "ensemble_w_graph_state_dict.pkl"
#torch.save(best_sd, save_path)
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

mla_bert_ft_ensemble(
  (mla_bert): multilevel_attention(
    (self_attn): SelfAttention()
  )
  (fc): Linear(in_features=1068, out_features=1368, bias=True)
  (fc_aligner): fc_aligner(
    (fc1): Linear(in_features=300, out_features=300, bias=True)
  )
)

In [16]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, \
              dumb=True, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              dumb=False, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=67.0), HTML(value='')))


MRR: 0.422


In [11]:
# # Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# Dict + Stoilos + Neural
accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
                           ed_dict=ED_DICT, topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 59.0%
0.45972449217837963
[dict+smatch+model] Acc@1: 0.528 Acc@10: 0.645 Acc@100: 0.746 Acc sum: 1.919
