In [None]:
!pip install faiss-gpu
!pip install pytorch_metric_learning
!pip install transformers

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Collecting pytorch_metric_learning
  Downloading pytorch_metric_learning-2.3.0-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pytorch_metric_learning
Successfully installed pytorch_metric_learning-2.3.0
Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [9

In [None]:
import faiss
from tqdm import tqdm
import numpy as np
import pickle as pkl
from collections import OrderedDict
from typing import Dict
from transformers import AutoModel, AutoTokenizer
from sklearn.decomposition import PCA
import sys
! cp -r /content/drive/MyDrive/sapBERT-DUT-cambridge/src /content/src
from src.model_wrapper import Model_Wrapper
import pandas as pd
import sklearn
from sklearn.model_selection import ParameterGrid
import re

In [None]:
finetunesets = {
    'nl-wiki_bel-test': 'nl_wiki_bel_traintestsplit',
    'mantra_cleaned' : 'nl_wiki_bel_all'
}

In [None]:
sys.path.append('/content/drive/MyDrive/sapBERT-DUT-cambridge')
sapBERT_epochs = 0
finetune_epochs = 10
test_dataset = 'nl-wiki_bel-valid'
model_directory_path = f'/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa_sapBERT/{sapBERT_epochs}_epoch/ft_{finetune_epochs}_epoch/{finetunesets[test_dataset]}'

data = pd.read_pickle(f"/content/drive/MyDrive/sapBERT-DUT-cambridge/evaluation/{test_dataset}.pkl")
sentences = data['sentence'].to_list()
mentions = data['mention'].to_list()
if test_dataset == 'nl-wiki_bel-valid':
    trues = data['checked_cuis'].to_list()
else:
    trues = data['cui']
trues = [true.lstrip("['").rstrip("']") for true in trues]

In [None]:
idx2cui = pkl.load(open('/content/drive/MyDrive/sapBERT-DUT-cambridge/index/id2cui.pkl', 'rb'))
cui2string = pkl.load(open('/content/drive/MyDrive/sapBERT-DUT-cambridge/index/cui_to_string', 'rb'))
relations = pkl.load(open('/content/drive/MyDrive/sapBERT_resources/relations', 'rb'))

In [None]:
from src.model_wrapper import (
    Model_Wrapper
)

In [None]:
def get_query_embedding(queries, tokenizer, model):
    bs = 128
    all_reps = []
    for i in tqdm(np.arange(0, len(queries), bs)):
        toks = tokenizer.batch_encode_plus(queries[i:i+bs],
                                        padding="max_length",
                                        max_length=25,
                                        truncation=True,
                                        return_tensors="pt")
        toks_cuda = {}
        for k,v in toks.items():
            toks_cuda[k] = v.cuda()
        output = model(**toks_cuda)

        cls_rep = output[0][:,0,:]

        all_reps.append(cls_rep.cpu().detach().numpy())
    all_reps_emb = np.concatenate(all_reps, axis=0)

    return all_reps_emb

def query_index(queries, tokenizer, model, index, idx2cui, cui2string, pca):
    query_embs = get_query_embedding(queries, tokenizer, model)
    query_embs = np.array(pca.transform(query_embs), dtype=np.float32)

    preds = []

    for query_emb in tqdm(query_embs):
        dist, neighbors = index.search(np.reshape(query_emb, (1,256)), 5)
        dist, neighbors = dist[0], neighbors[0]

        pred = idx2cui[neighbors[0]]
        preds.append(pred)

    return preds

In [None]:
def getResources(model_directory_path):
    # if basemodel
    if '/medRoBERTa/ft_0_epoch/' in model_directory_path:
        tokenizer = AutoTokenizer.from_pretrained('CLTL/MedRoBERTa.nl')
        model = AutoModel.from_pretrained('CLTL/MedRoBERTa.nl').to('cuda')

        index = faiss.read_index('/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa/ft_0_epoch/index')
        pca = pkl.load(open('/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa/ft_0_epoch/pca', "rb"))

        # index = faiss.read_index('/content/drive/MyDrive/sapBERT-DUT-cambridge/index/index_basemodel_pca')
        # pca = pkl.load(open('/content/drive/MyDrive/sapBERT-DUT-cambridge/basemodel_pca', "rb"))
    else:
        model_wrapper = Model_Wrapper().load_model(
                path=f'{model_directory_path}',
                # path='/content/drive/MyDrive/sapBERT-DUT-cambridge/output_finetune_3epoch',
                max_length=25,
                use_cuda=True,
            )
        tokenizer = model_wrapper.get_dense_tokenizer()
        model = model_wrapper.get_dense_encoder()
        index = faiss.read_index(f'{model_directory_path}/index')
        pca = pkl.load(open(f'{model_directory_path}/pca', "rb"))
        # index = faiss.read_index('/content/drive/MyDrive/sapBERT-DUT-cambridge/finetuned_3epoch/index_pca')
        # pca = pkl.load(open('/content/drive/MyDrive/sapBERT-DUT-cambridge/finetuned_3epoch/pca', "rb"))
    return tokenizer, model, index, pca

In [None]:
def checkRelations(pred, trues, relations):
    if isinstance(trues, list):
        for true in trues:
            key = f"{true}|{pred}"
            if key in relations:
                return relations[key]
    else:
        key = f"{trues}|{pred}"
        key2 = f"{pred}|{trues}"
        if key in relations:
            return relations[key]
        elif key2 in relations:
            return relations[key2]

    return False

In [None]:
def checkRelation(preds, trues, relations):
    trues_ = []
    relas = []
    dist_1_relas = []

    for i, pred in enumerate(preds):

        rela = checkRelations(pred, trues[i].split(','), relations)
        current_trues = re.findall('C\d{7}', trues[i])

        if pred in current_trues:
            trues_.append(pred)
            dist_1_relas.append(True)
            relas.append('')
        elif rela:
            trues_.append(current_trues[0])
            dist_1_relas.append(True)
            relas.append(rela)
            # relas.append(True)
        else:
            trues_.append(current_trues[0])
            dist_1_relas.append(False)
            relas.append('')
    return trues_, dist_1_relas, relas

In [None]:
def predict(model_directory_path, sentences, mentions, idx2cui, cui2string):
    tokenizer, model, index, pca = getResources(model_directory_path)
    preds = query_index(mentions, tokenizer, model, index, idx2cui, cui2string, pca)
    return preds

In [None]:
def evaluate(preds, trues, relas):
    acc = sklearn.metrics.accuracy_score(checked_trues, preds)
    acc_1dist = sum(relas)/len(relas)

    print(f"Accuracy: {acc}")
    print('1-dist accuracy: ', {acc_1dist})

    return acc, acc_1dist

In [None]:
def saveResults(sentences, mentions, cui2string, preds, trues, relas, save=False):
    results = []

    for i, true in enumerate(trues):
        try:
            results.append([sentences[i], mentions[i], cui2string[preds[i]], cui2string[true], preds[i], true, relas[i]])
        except:
            print(mentions[i], true, preds[i])
            # continue
    df_results = pd.DataFrame(results, columns=['sentence', 'mention', 'prediction', 'label', 'cui_prediction', 'cui_label', 'relation'])

    if save:
        df_results.to_csv(f'{model_directory_path}/predictions.csv')

        with open(f'{model_directory_path}/preds', 'wb') as f:
            pkl.dump(preds, f)

In [None]:
results = []

param_grid = {
    '2nd_sapBERT_epochs' : [0, 1, 3, 10],
    '3rd_finetune_epochs' : [0, 1, 3, 10],
    'testsets' : ['nl-wiki_bel-test', 'mantra_cleaned']
}

for c in list(ParameterGrid(param_grid)):
    print(c)
    test_dataset = c['testsets']
    finetune_epochs = c['3rd_finetune_epochs']
    sapBERT_epochs = c['2nd_sapBERT_epochs']

    # if basemodel
    if c['2nd_sapBERT_epochs'] == 0 and c['3rd_finetune_epochs'] == 0:
        model_directory_path = f'/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa/ft_0_epoch/{finetunesets[test_dataset]}'
    # if no 2nd phase pretraining
    elif c['2nd_sapBERT_epochs'] == 0:
        model_directory_path = f'/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa/ft_{finetune_epochs}_epoch/{finetunesets[test_dataset]}'
    # if no finetuning
    elif c['3rd_finetune_epochs'] == 0:
        model_directory_path = f'/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa_sapBERT/{sapBERT_epochs}_epoch/ft_0_epoch'
    else:
        model_directory_path = f'/content/drive/MyDrive/sapBERT-DUT-cambridge/results/medRoBERTa_sapBERT/{sapBERT_epochs}_epoch/ft_{finetune_epochs}_epoch/{finetunesets[test_dataset]}'

    data = pd.read_pickle(f"/content/drive/MyDrive/sapBERT-DUT-cambridge/evaluation/{test_dataset}.pkl")
    sentences = data['sentence'].to_list()
    mentions = data['mention'].to_list()
    if test_dataset == 'nl-wiki_bel-test':
        trues = data['checked_cuis'].to_list()
    else:
        trues = data['cui']
    trues = [true.lstrip("['").rstrip("']") for true in trues]

    preds = predict(model_directory_path, sentences, mentions, idx2cui, cui2string)
    checked_trues, dist_1_relas, relas = checkRelation(preds, trues, relations)
    acc, acc_1dist = evaluate(preds, checked_trues, dist_1_relas)
    saveResults(sentences, mentions, cui2string, preds, checked_trues, relas, True)

    results.append([c['2nd_sapBERT_epochs'], c['3rd_finetune_epochs'], c['testsets'], acc, acc_1dist])


{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 0, 'testsets': 'nl-wiki_bel-test'}


Downloading (…)lve/main/config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/893k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/539k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/504M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at CLTL/MedRoBERTa.nl and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 4/4 [00:03<00:00,  1.03it/s]
100%|██████████| 480/480 [00:00<00:00, 860.81it/s]


Accuracy: 0.20833333333333334
1-dist accuracy:  {0.39791666666666664}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 0, 'testsets': 'mantra_cleaned'}


Some weights of RobertaModel were not initialized from the model checkpoint at CLTL/MedRoBERTa.nl and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2/2 [00:00<00:00,  7.40it/s]
100%|██████████| 157/157 [00:00<00:00, 901.86it/s]


Accuracy: 0.42038216560509556
1-dist accuracy:  {0.5286624203821656}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 1, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.95it/s]
100%|██████████| 480/480 [00:00<00:00, 734.32it/s]


Accuracy: 0.22916666666666666
1-dist accuracy:  {0.42916666666666664}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 1, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  8.07it/s]
100%|██████████| 157/157 [00:00<00:00, 908.44it/s]


Accuracy: 0.4267515923566879
1-dist accuracy:  {0.5477707006369427}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 3, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.12it/s]
100%|██████████| 480/480 [00:00<00:00, 667.30it/s]


Accuracy: 0.225
1-dist accuracy:  {0.45}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 3, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.11it/s]
100%|██████████| 157/157 [00:00<00:00, 827.18it/s]


Accuracy: 0.4267515923566879
1-dist accuracy:  {0.5477707006369427}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 10, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.19it/s]
100%|██████████| 480/480 [00:00<00:00, 892.66it/s]


Accuracy: 0.24583333333333332
1-dist accuracy:  {0.45625}
{'2nd_sapBERT_epochs': 0, '3rd_finetune_epochs': 10, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.05it/s]
100%|██████████| 157/157 [00:00<00:00, 899.15it/s]


Accuracy: 0.40764331210191085
1-dist accuracy:  {0.5159235668789809}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 0, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.43it/s]
100%|██████████| 480/480 [00:00<00:00, 932.62it/s]


Accuracy: 0.10625
1-dist accuracy:  {0.17916666666666667}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 0, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.84it/s]
100%|██████████| 157/157 [00:00<00:00, 887.64it/s]


Accuracy: 0.2484076433121019
1-dist accuracy:  {0.29936305732484075}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 1, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.03it/s]
100%|██████████| 480/480 [00:00<00:00, 546.01it/s]


Accuracy: 0.27708333333333335
1-dist accuracy:  {0.4979166666666667}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 1, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.04it/s]
100%|██████████| 157/157 [00:00<00:00, 625.44it/s]


Accuracy: 0.46496815286624205
1-dist accuracy:  {0.6560509554140127}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 3, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.03it/s]
100%|██████████| 480/480 [00:00<00:00, 719.51it/s]


Accuracy: 0.2875
1-dist accuracy:  {0.48541666666666666}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 3, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.64it/s]
100%|██████████| 157/157 [00:00<00:00, 733.52it/s]


Accuracy: 0.445859872611465
1-dist accuracy:  {0.5987261146496815}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 10, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.98it/s]
100%|██████████| 480/480 [00:00<00:00, 555.25it/s]


Accuracy: 0.29375
1-dist accuracy:  {0.5125}
{'2nd_sapBERT_epochs': 1, '3rd_finetune_epochs': 10, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.20it/s]
100%|██████████| 157/157 [00:00<00:00, 731.55it/s]


Accuracy: 0.42038216560509556
1-dist accuracy:  {0.554140127388535}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 0, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.20it/s]
100%|██████████| 480/480 [00:00<00:00, 579.63it/s]


Accuracy: 0.2916666666666667
1-dist accuracy:  {0.5104166666666666}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 0, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.75it/s]
100%|██████████| 157/157 [00:00<00:00, 752.95it/s]


Accuracy: 0.5222929936305732
1-dist accuracy:  {0.6369426751592356}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 1, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.99it/s]
100%|██████████| 480/480 [00:00<00:00, 611.03it/s]


Accuracy: 0.3020833333333333
1-dist accuracy:  {0.5104166666666666}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 1, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.03it/s]
100%|██████████| 157/157 [00:00<00:00, 567.89it/s]


Accuracy: 0.5031847133757962
1-dist accuracy:  {0.6624203821656051}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 3, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.85it/s]
100%|██████████| 480/480 [00:00<00:00, 621.62it/s]


Accuracy: 0.3229166666666667
1-dist accuracy:  {0.525}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 3, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  6.81it/s]
100%|██████████| 157/157 [00:00<00:00, 688.94it/s]


Accuracy: 0.5095541401273885
1-dist accuracy:  {0.6560509554140127}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 10, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.95it/s]
100%|██████████| 480/480 [00:00<00:00, 839.39it/s]


Accuracy: 0.29375
1-dist accuracy:  {0.5104166666666666}
{'2nd_sapBERT_epochs': 3, '3rd_finetune_epochs': 10, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  3.56it/s]
100%|██████████| 157/157 [00:00<00:00, 760.60it/s]


Accuracy: 0.47770700636942676
1-dist accuracy:  {0.6178343949044586}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 0, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  5.79it/s]
100%|██████████| 480/480 [00:00<00:00, 628.47it/s]


Accuracy: 0.26875
1-dist accuracy:  {0.49166666666666664}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 0, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.91it/s]
100%|██████████| 157/157 [00:00<00:00, 807.52it/s]


Accuracy: 0.4840764331210191
1-dist accuracy:  {0.6305732484076433}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 1, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.23it/s]
100%|██████████| 480/480 [00:00<00:00, 840.76it/s]


Accuracy: 0.25833333333333336
1-dist accuracy:  {0.47291666666666665}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 1, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  6.61it/s]
100%|██████████| 157/157 [00:00<00:00, 618.26it/s]


Accuracy: 0.45222929936305734
1-dist accuracy:  {0.6242038216560509}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 3, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.27it/s]
100%|██████████| 480/480 [00:00<00:00, 863.03it/s]


Accuracy: 0.2833333333333333
1-dist accuracy:  {0.50625}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 3, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  8.04it/s]
100%|██████████| 157/157 [00:00<00:00, 811.71it/s]


Accuracy: 0.47770700636942676
1-dist accuracy:  {0.6242038216560509}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 10, 'testsets': 'nl-wiki_bel-test'}


100%|██████████| 4/4 [00:00<00:00,  6.02it/s]
100%|██████████| 480/480 [00:00<00:00, 866.24it/s]


Accuracy: 0.29583333333333334
1-dist accuracy:  {0.5104166666666666}
{'2nd_sapBERT_epochs': 10, '3rd_finetune_epochs': 10, 'testsets': 'mantra_cleaned'}


100%|██████████| 2/2 [00:00<00:00,  7.35it/s]
100%|██████████| 157/157 [00:00<00:00, 836.85it/s]


Accuracy: 0.47770700636942676
1-dist accuracy:  {0.6178343949044586}


In [None]:
df_results = pd.DataFrame(results, columns=['sapBERT_epochs', 'finetune_epochs', 'test_set', 'acc', '1-dist_acc'])
df_results

Unnamed: 0,sapBERT_epochs,finetune_epochs,test_set,acc,1-dist_acc
0,0,0,nl-wiki_bel-test,0.208333,0.397917
1,0,0,mantra_cleaned,0.420382,0.528662
2,0,1,nl-wiki_bel-test,0.229167,0.429167
3,0,1,mantra_cleaned,0.426752,0.547771
4,0,3,nl-wiki_bel-test,0.225,0.45
5,0,3,mantra_cleaned,0.426752,0.547771
6,0,10,nl-wiki_bel-test,0.245833,0.45625
7,0,10,mantra_cleaned,0.407643,0.515924
8,1,0,nl-wiki_bel-test,0.10625,0.179167
9,1,0,mantra_cleaned,0.248408,0.299363


In [None]:
df_results.to_csv('/content/drive/MyDrive/sapBERT-DUT-cambridge/evaluation/hyperparameter-tuning.csv')

In [None]:
data

Unnamed: 0_level_0,mention,cui,start_index,end_index,sentence,uniq_cui,checked_cuis
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2739,anxiolytisch,['C0040616'],16,28,"Temazepam werkt anxiolytisch, hypnotisch, anti...",C0040616,C0040616
4256,bewegingsziekte,['C0026603'],79,94,De top van het gebouw zwaait tijdens harde win...,C0026603,C0026603
144,darmstelsel,['C0017189'],89,100,"Amber, ook wel ambergris, is een overwegend gr...",C0017189,C0017189
2452,alcoholverslaafden,['C0001956'],104,122,"In januari 2007 heeft project ""BinnenPlaats"" b...",C0001956,C0001956
1027,maxillae,['C0024947'],77,85,In de ronde snuit vormen de tanddragende beend...,C0024947,C0024947
...,...,...,...,...,...,...,...
2445,Vasten,['C0015663'],182,188,"De ""Godsvredebeweging"" die uitging van de Abdi...",C0015663,C0015663
8154,baksoda,['C0074722'],38,45,SodabroodSodabrood is brood dat met baksoda ge...,C0074722,C0074722
4511,nikkel(II),['C0028013'],64,74,Het wordt ook toegepast bij de Nozaki-Hiyama-K...,C0028013,C0028013
2197,Longontsteking,['C0032285'],51,65,Op 1 mei 2006 overleed hij aan de gevolgen van...,C0032285,C0032285
