## Prelims

### configs

In [1]:
GPU_INDEX=0
DATA_DIR="../../data/splits/zeroshot_general/"
FULL_CHV_PATH="../../data/chv.csv"
FEATURE_PATH="../../data/precomputed_features/"
SNOMED_PATH = "../../data/SnomedCT_201907/"

### imports

In [2]:
%load_ext autoreload
%autoreload 2

import torch
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '../../'))
from src.data import *
from src.loss import *
from src.models import *
from src.evaluation import *
from src.train import *

In [3]:
torch.manual_seed(2020)

<torch._C.Generator at 0x7f3cf40eff90>

### load some dicts (to be used later)

In [4]:
# load SNOMED graph
from data.Snomed import Snomed
snomed = Snomed(SNOMED_PATH, taxonomy=False)
snomed.load_snomed()

In [5]:
# create surface_to_snomed_id dict
SF2ID = build_surface_to_snomed_id(snomed)

In [6]:
# create testset_row_index_to_ed_dict dict
ED_DICT = pkl.load(open(os.path.join(FEATURE_PATH,"term_ed_dic_zeroshot_general.pkl"),"rb"))
ONT_ED_DICT = pkl.load(open(os.path.join(FEATURE_PATH,"term_onto_ed_dic_zeroshot_general.pkl"),"rb"))

## Experiments

### BERT align

#### static term -> surface

In [7]:
# input: static term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_plain_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path, 
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path,
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [9]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.002 AccSum: 0.002
[epoch 1][dev ] Acc@1: 0.020 Acc@10: 0.164 Acc@100: 0.341 AccSum: 0.525
[epoch 1][test] Acc@1: 0.042 Acc@10: 0.143 Acc@100: 0.314 AccSum: 0.499
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.112 Acc@10: 0.297 Acc@100: 0.524 AccSum: 0.933
[epoch 2][test] Acc@1: 0.102 Acc@10: 0.288 Acc@100: 0.526 AccSum: 0.916
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.175 Acc@10: 0.391 Acc@100: 0.608 AccSum: 1.174
[epoch 3][test] Acc@1: 0.171 Acc@10: 0.385 Acc@100: 0.605 AccSum: 1.161
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.192 Acc@10: 0.433 Acc@100: 0.637 AccSum: 1.261
[epoch 4][test] Acc@1: 0.198 Acc@10: 0.431 Acc@100: 0.635 AccSum: 1.264
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.219 Acc@10: 0.413 Acc@100: 0.631 AccSum: 1.263
[epoch 5][test] Acc@1: 0.200 Acc@10: 0.435 Acc@100: 0.628 AccSum: 1.262
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.232 Acc@10: 0.426 Acc@100: 0.653 AccSum: 1.311
[epoch 6][test] Acc@1: 0.210 Acc@10: 0.442 Acc@100: 0.64

In [10]:
save_path = "bert_static_term_to_surface_state_dict.pkl"
torch.save(model.state_dict(), save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [11]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.344


#### contextual_term -> surface

##### mutlilevel attention

In [11]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [12]:
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.002 AccSum: 0.002
[epoch 1][dev ] Acc@1: 0.112 Acc@10: 0.331 Acc@100: 0.554 AccSum: 0.997
[epoch 1][test] Acc@1: 0.115 Acc@10: 0.340 Acc@100: 0.559 AccSum: 1.014
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.181 Acc@10: 0.384 Acc@100: 0.630 AccSum: 1.195
[epoch 2][test] Acc@1: 0.187 Acc@10: 0.415 Acc@100: 0.604 AccSum: 1.206
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.245 Acc@10: 0.499 Acc@100: 0.735 AccSum: 1.480
[epoch 3][test] Acc@1: 0.259 Acc@10: 0.526 Acc@100: 0.711 AccSum: 1.496
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.221 Acc@10: 0.470 Acc@100: 0.697 AccSum: 1.388
[epoch 5][dev ] Acc@1: 0.263 Acc@10: 0.531 Acc@100: 0.745 AccSum: 1.539
[epoch 5][test] Acc@1: 0.279 Acc@10: 0.555 Acc@100: 0.732 AccSum: 1.566
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.269 Acc@10: 0.534 Acc@100: 0.744 AccSum: 1.547
[epoch 6][test] Acc@1: 0.274 Acc@10: 0.541 Acc@100: 0.725 AccSum: 1.540
[best epoch: 6]
[epoch 7][dev ] Acc@1: 0.242 Acc@10: 0.501 Acc@100: 0.70

In [13]:
save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
torch.save(best_sd, save_path)
model = multilevel_attention(input_size=768, target_size=768, lin=True).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

multilevel_attention(
  (self_attn): SelfAttention()
  (fc): Linear(in_features=768, out_features=768, bias=True)
)

In [14]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.380


##### linear + relu

In [16]:
# input: contextual_term
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [17]:
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.132 Acc@10: 0.336 Acc@100: 0.586 AccSum: 1.054
[epoch 1][test] Acc@1: 0.136 Acc@10: 0.347 Acc@100: 0.576 AccSum: 1.060
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.209 Acc@10: 0.426 Acc@100: 0.659 AccSum: 1.295
[epoch 2][test] Acc@1: 0.193 Acc@10: 0.430 Acc@100: 0.648 AccSum: 1.272
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.217 Acc@10: 0.445 Acc@100: 0.683 AccSum: 1.344
[epoch 3][test] Acc@1: 0.207 Acc@10: 0.468 Acc@100: 0.659 AccSum: 1.334
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.216 Acc@10: 0.460 Acc@100: 0.675 AccSum: 1.351
[epoch 4][test] Acc@1: 0.211 Acc@10: 0.468 Acc@100: 0.672 AccSum: 1.351
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.238 Acc@10: 0.475 Acc@100: 0.691 AccSum: 1.404
[epoch 5][test] Acc@1: 0.223 Acc@10: 0.486 Acc@100: 0.687 AccSum: 1.397
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.242 Acc@10: 0.462 Acc@100: 0.688 AccSum: 1.392
[epoch 7][dev ] Acc@1: 0.241 Acc@10: 0.459 Acc@100: 0.69

In [18]:
save_path = "bert_context_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=768, target_size=768).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

# val_or_test(model, test_loader, testset.search_space_embeddings, \
#                 epoch=-1, typ="test", bsz=128, GPU_INDEX=GPU_INDEX)

fc_aligner(
  (fc1): Linear(in_features=768, out_features=768, bias=True)
)

In [19]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, \
              dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.330


### fasttext align

#### term -> surface

In [20]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [21]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2,max_violation=True,device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.002 Acc@100: 0.003 AccSum: 0.005
[epoch 1][dev ] Acc@1: 0.031 Acc@10: 0.190 Acc@100: 0.499 AccSum: 0.720
[epoch 1][test] Acc@1: 0.055 Acc@10: 0.193 Acc@100: 0.465 AccSum: 0.712
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.123 Acc@10: 0.393 Acc@100: 0.686 AccSum: 1.202
[epoch 2][test] Acc@1: 0.122 Acc@10: 0.378 Acc@100: 0.673 AccSum: 1.173
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.141 Acc@10: 0.466 Acc@100: 0.733 AccSum: 1.341
[epoch 3][test] Acc@1: 0.147 Acc@10: 0.435 Acc@100: 0.727 AccSum: 1.309
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.159 Acc@10: 0.458 Acc@100: 0.758 AccSum: 1.375
[epoch 4][test] Acc@1: 0.170 Acc@10: 0.473 Acc@100: 0.743 AccSum: 1.385
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.168 Acc@10: 0.481 Acc@100: 0.750 AccSum: 1.399
[epoch 5][test] Acc@1: 0.172 Acc@10: 0.493 Acc@100: 0.753 AccSum: 1.418
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.173 Acc@10: 0.490 Acc@100: 0.764 AccSum: 1.426
[epoch 6][test] Acc@1: 0.189 Acc@10: 0.503 Acc@100: 0.76

In [22]:
save_path = "ft_term_to_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [23]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.322


#### term -> graph

In [6]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [7]:
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.003 Acc@10: 0.026 Acc@100: 0.107 AccSum: 0.136
[epoch 1][test] Acc@1: 0.008 Acc@10: 0.030 Acc@100: 0.111 AccSum: 0.149
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.002 Acc@10: 0.039 Acc@100: 0.170 AccSum: 0.211
[epoch 2][test] Acc@1: 0.011 Acc@10: 0.049 Acc@100: 0.180 AccSum: 0.241
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.011 Acc@10: 0.065 Acc@100: 0.209 AccSum: 0.285
[epoch 3][test] Acc@1: 0.016 Acc@10: 0.067 Acc@100: 0.210 AccSum: 0.293
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.020 Acc@10: 0.058 Acc@100: 0.205 AccSum: 0.283
[epoch 5][dev ] Acc@1: 0.018 Acc@10: 0.075 Acc@100: 0.221 AccSum: 0.314
[epoch 5][test] Acc@1: 0.014 Acc@10: 0.079 Acc@100: 0.215 AccSum: 0.307
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.014 Acc@10: 0.081 Acc@100: 0.249 AccSum: 0.344
[epoch 6][test] Acc@1: 0.014 Acc@10: 0.083 Acc@100: 0.227 AccSum: 0.324
[best epoch: 6]
[epoch 7][dev ] Acc@1: 0.014 Acc@10: 0.082 Acc@100: 0.25

In [8]:
save_path = "term_to_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=300).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
)

In [10]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=256, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, sample=False,\
#             GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))


MRR: 0.042


#### term -> surface+graph

In [11]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [12]:
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.044 Acc@10: 0.185 Acc@100: 0.429 AccSum: 0.658
[epoch 1][test] Acc@1: 0.041 Acc@10: 0.160 Acc@100: 0.407 AccSum: 0.608
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.063 Acc@10: 0.276 Acc@100: 0.555 AccSum: 0.894
[epoch 2][test] Acc@1: 0.080 Acc@10: 0.253 Acc@100: 0.528 AccSum: 0.861
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.078 Acc@10: 0.316 Acc@100: 0.596 AccSum: 0.990
[epoch 3][test] Acc@1: 0.098 Acc@10: 0.289 Acc@100: 0.566 AccSum: 0.953
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.094 Acc@10: 0.346 Acc@100: 0.610 AccSum: 1.051
[epoch 4][test] Acc@1: 0.099 Acc@10: 0.321 Acc@100: 0.606 AccSum: 1.025
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.106 Acc@10: 0.366 Acc@100: 0.628 AccSum: 1.100
[epoch 5][test] Acc@1: 0.103 Acc@10: 0.340 Acc@100: 0.636 AccSum: 1.079
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.115 Acc@10: 0.382 Acc@100: 0.648 AccSum: 1.145
[epoch 6][test] Acc@1: 0.113 Acc@10: 0.356 Acc@100: 0.63

In [13]:
save_path = "ft_term_to_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=300, target_size=600).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=300, out_features=600, bias=True)
)

In [14]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGD: %.3f MGD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.203


### combine BERT with fasttext

#### ft_term+bert_term -> ft_surface+bert_surface+graph

In [12]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [14]:
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.002 Acc@100: 0.005 AccSum: 0.007
[epoch 1][dev ] Acc@1: 0.169 Acc@10: 0.399 Acc@100: 0.681 AccSum: 1.249
[epoch 1][test] Acc@1: 0.167 Acc@10: 0.422 Acc@100: 0.669 AccSum: 1.259
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.225 Acc@10: 0.472 Acc@100: 0.738 AccSum: 1.435
[epoch 2][test] Acc@1: 0.210 Acc@10: 0.503 Acc@100: 0.734 AccSum: 1.446
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.250 Acc@10: 0.504 Acc@100: 0.765 AccSum: 1.519
[epoch 3][test] Acc@1: 0.233 Acc@10: 0.544 Acc@100: 0.770 AccSum: 1.547
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.256 Acc@10: 0.520 Acc@100: 0.773 AccSum: 1.550
[epoch 4][test] Acc@1: 0.245 Acc@10: 0.550 Acc@100: 0.772 AccSum: 1.567
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.268 Acc@10: 0.534 Acc@100: 0.781 AccSum: 1.583
[epoch 5][test] Acc@1: 0.253 Acc@10: 0.564 Acc@100: 0.789 AccSum: 1.606
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.275 Acc@10: 0.557 Acc@100: 0.791 AccSum: 1.623
[epoch 6][test] Acc@1: 0.252 Acc@10: 0.569 Acc@100: 0.79

In [15]:
save_path = "ft_bert_context_term_to_ft_bert_surface+graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1368, bias=True)
)

In [16]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.388


#### ft_term+bert_term -> ft_surface+bert_surface

In [19]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
snomed_vec_path = os.path.join(FEATURE_PATH, "snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_new.pkl")
term_vec_path = os.path.join(FEATURE_PATH,"fasttext_term_embeddings+chv_term_embeddings_BERTbr_ts100k.pkl")

train_loader, _ = get_loader_single(FULL_CHV_PATH, chv_train_path,
                                    term_vec_path, snomed_vec_path, 
                                    batch_size=64, shuffle=True, 
                                    num_workers=10, gran="general")
val_loader, valset = get_loader_single(FULL_CHV_PATH, chv_dev_path, 
                                       term_vec_path, snomed_vec_path, 
                                       batch_size=64, shuffle=False, 
                                       num_workers=10, gran="general")
test_loader, testset = get_loader_single(FULL_CHV_PATH, chv_test_path, 
                                         term_vec_path, snomed_vec_path, 
                                         batch_size=64, shuffle=False, 
                                         num_workers=10, gran="general", 
                                         load_target=True)

[target embeddings loaded, search space size: 350830]


In [20]:
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
train_params = model.parameters()
optimizer = torch.optim.AdamW(train_params, lr=1e-4)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train(model, train_params, optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.001 AccSum: 0.001
[epoch 1][dev ] Acc@1: 0.134 Acc@10: 0.339 Acc@100: 0.589 AccSum: 1.062
[epoch 1][test] Acc@1: 0.136 Acc@10: 0.333 Acc@100: 0.575 AccSum: 1.045
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.206 Acc@10: 0.461 Acc@100: 0.713 AccSum: 1.380
[epoch 2][test] Acc@1: 0.205 Acc@10: 0.466 Acc@100: 0.717 AccSum: 1.387
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.239 Acc@10: 0.494 Acc@100: 0.754 AccSum: 1.487
[epoch 3][test] Acc@1: 0.240 Acc@10: 0.521 Acc@100: 0.750 AccSum: 1.511
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.260 Acc@10: 0.513 Acc@100: 0.768 AccSum: 1.542
[epoch 4][test] Acc@1: 0.245 Acc@10: 0.528 Acc@100: 0.754 AccSum: 1.528
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.264 Acc@10: 0.521 Acc@100: 0.771 AccSum: 1.556
[epoch 5][test] Acc@1: 0.257 Acc@10: 0.540 Acc@100: 0.761 AccSum: 1.558
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.274 Acc@10: 0.528 Acc@100: 0.780 AccSum: 1.582
[epoch 6][test] Acc@1: 0.266 Acc@10: 0.551 Acc@100: 0.76

In [21]:
save_path = "ft_bert_context_term_to_ft_bert_surface_state_dict.pkl"
torch.save(best_sd, save_path)
model = fc_aligner(input_size=1068, target_size=1068).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

fc_aligner(
  (fc1): Linear(in_features=1068, out_features=1068, bias=True)
)

In [22]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=128, dumb=True, GPU_INDEX=GPU_INDEX)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=256, GPU_INDEX=GPU_INDEX)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.390


####  ft_term+bert_term (multilevel attention) -> ft_surface+bert_surface(+graph)

In [7]:
chv_train_path = os.path.join(DATA_DIR, 'train.csv')
chv_dev_path = os.path.join(DATA_DIR, 'dev.csv')
chv_test_path = os.path.join(DATA_DIR, 'test.csv')
#snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST.pkl")
snomed_vec_path = os.path.join(FEATURE_PATH,"snomed_surface_fasttext_embeddings_full+snomed_surface_bert_ts100k_embeddings_all_names_mean_full_last_layer_with_ST+snomed_node2vec_300d_20wl_embeddings.pkl")
term_vec_path1 = os.path.join(FEATURE_PATH,"fasttext_term_embeddings.pkl")
term_vec_path2 = os.path.join(FEATURE_PATH,"chv_term_embeddings_BERTbr_ts100k_multilevel_all.pkl")

gran="general"

train_dict = build_train_dict(chv_train_path, gran=gran)

train_loader, _ = get_loader_mla(FULL_CHV_PATH, chv_train_path, 
                                 term_vec_path1, term_vec_path2, 
                                 snomed_vec_path, batch_size=64, 
                                 shuffle=True, num_workers=10, gran=gran)
val_loader, valset = get_loader_mla(FULL_CHV_PATH, chv_dev_path, 
                                    term_vec_path1, term_vec_path2, 
                                    snomed_vec_path, batch_size=64, 
                                    shuffle=False, num_workers=10, gran=gran)
test_loader, testset = get_loader_mla(FULL_CHV_PATH, chv_test_path, 
                                      term_vec_path1, term_vec_path2, 
                                      snomed_vec_path, batch_size=64,
                                      shuffle=False,  num_workers=10, 
                                      gran=gran, load_target=True)

[target embeddings loaded, search space size: 350830]


In [11]:
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)

# init mla_bert using previous weights
#save_path = "bert_context_term_to_surface_attn_state_dict.pkl"
#model.mla_bert.load_state_dict({"self_attn.att_weights":torch.load(save_path)["self_attn.att_weights"]})
#model.mla_bert.load_state_dict(torch.load(save_path))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = TripletLoss(margin=0.2, max_violation=True, device=GPU_INDEX)

best_sd = train_joint(model, model.parameters(), optimizer, criterion, \
      train_loader, val_loader, valset, test_loader, \
      testset, num_epoch=50, dor=0.0, GPU_INDEX=GPU_INDEX)

[epoch 0][dev ] Acc@1: 0.000 Acc@10: 0.000 Acc@100: 0.000 AccSum: 0.000
[epoch 1][dev ] Acc@1: 0.074 Acc@10: 0.227 Acc@100: 0.426 AccSum: 0.726
[epoch 1][test] Acc@1: 0.048 Acc@10: 0.162 Acc@100: 0.365 AccSum: 0.575
[best epoch: 1]
[epoch 2][dev ] Acc@1: 0.201 Acc@10: 0.445 Acc@100: 0.693 AccSum: 1.338
[epoch 2][test] Acc@1: 0.167 Acc@10: 0.435 Acc@100: 0.680 AccSum: 1.282
[best epoch: 2]
[epoch 3][dev ] Acc@1: 0.249 Acc@10: 0.510 Acc@100: 0.743 AccSum: 1.502
[epoch 3][test] Acc@1: 0.235 Acc@10: 0.512 Acc@100: 0.741 AccSum: 1.487
[best epoch: 3]
[epoch 4][dev ] Acc@1: 0.258 Acc@10: 0.554 Acc@100: 0.775 AccSum: 1.587
[epoch 4][test] Acc@1: 0.262 Acc@10: 0.550 Acc@100: 0.763 AccSum: 1.576
[best epoch: 4]
[epoch 5][dev ] Acc@1: 0.277 Acc@10: 0.565 Acc@100: 0.798 AccSum: 1.640
[epoch 5][test] Acc@1: 0.276 Acc@10: 0.573 Acc@100: 0.775 AccSum: 1.625
[best epoch: 5]
[epoch 6][dev ] Acc@1: 0.299 Acc@10: 0.588 Acc@100: 0.806 AccSum: 1.694
[epoch 6][test] Acc@1: 0.281 Acc@10: 0.584 Acc@100: 0.77

In [8]:
save_path = "ensemble_w_graph_state_dict.pkl"
torch.save(best_sd, save_path)
model = mla_bert_ft_ensemble(target_len=1368).cuda(GPU_INDEX)
model.load_state_dict(torch.load(save_path))
model.eval()

mla_bert_ft_ensemble(
  (mla_bert): multilevel_attention(
    (self_attn): SelfAttention()
  )
  (fc): Linear(in_features=1068, out_features=1368, bias=True)
  (fc_aligner): fc_aligner(
    (fc1): Linear(in_features=300, out_features=300, bias=True)
  )
)

In [19]:
mrr = evalMRR(test_loader, model, testset.search_space_embeddings, bsz=64, \
              dumb=True, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("MRR: %.3f" % mrr)

# gds = evalGD(test_loader, testset, model, topk=10, dor=0.0, bsz=128, \
#              dumb=False, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("MGRD: %.3f MGRD@10: %.3f" % gds)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=32.0), HTML(value='')))


MRR: 0.446


In [10]:
# # Exact Match + Neural
# accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
#                            ed_dict=None, topks=(1,10,100), bsz=128, GPU_INDEX=GPU_INDEX, ensemble=True)
# print ("[smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
#        (accs[0], accs[1], accs[2], sum(accs)))

# Dict + Stoilos + Neural
accs, pred_dict= compute_metrics_backoff(test_loader, testset, model, sf2id=SF2ID, train_dict=None, \
                           ed_dict=ONT_ED_DICT, topks=(1,10,100), bsz=64, GPU_INDEX=GPU_INDEX, ensemble=True)
print ("[dict+smatch+model] Acc@1: %.3f Acc@10: %.3f Acc@100: %.3f Acc sum: %.3f" % \
       (accs[0], accs[1], accs[2], sum(accs)))

ratio attempted by heuristics: 60.8%
0.48360450563204005
[dict+smatch+model] Acc@1: 0.555 Acc@10: 0.679 Acc@100: 0.775 Acc sum: 2.009
