In [None]:
from transformers import AutoTokenizer, AutoModel
import yaml
from utils.helpers import normalize_score, cos_sim_score, mean_pooling
from tqdm import tqdm
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import torch

with open('./benchmarks.yaml', 'r') as file:
        benchmarks = yaml.safe_load(file)


def gen_statistics(model, tokenizer):
        for k, v in benchmarks.items():
                tqdm.write(f'Processing {k} benchmark.')
                data = pd.read_csv(v.get('data_dir'), sep='\t')

                ## get terms
                term1 = list(set(data['Term1'].tolist()))
                term2 = list(set(data['Term2'].tolist()))
                terms = list(set([*term1, *term2]))

                del term1, term2

                min = v['min']
                max = v['max']

                ## normalize scores
                scores = normalize_score(np.asarray(data['Score'].tolist()), min=min, max=max)

                embs = {}
                for i in range(len(terms)):
                        x = tokenizer(terms[i], padding=True, truncation=False, return_tensors="pt")
                        with torch.no_grad():
                                emb = model(**x)
                        emb = mean_pooling(emb, x['attention_mask'])

                        embs[terms[i]] = {'mean': emb}

                        

                pred = []
                for i in range(len(data)):
                        term1, term2 = data.iloc[i]['Term1'], data.iloc[i]['Term2']
                        emb1, emb2 = embs.get(term1), embs.get(term2)
                        pred.append(cos_sim_score(emb1, emb2))

                corr, p_value = pearsonr(scores, pred)

                tqdm.write(f'Corr: {round(corr, 4)}, p-value: {p_value}')

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
tokenizer = AutoTokenizer.from_pretrained('./umlsbert')
model = AutoModel.from_pretrained('./umlsbert')


gen_statistics(model=model, tokenizer=tokenizer)

Processing pedersen benchmark.
Corr: 0.3191, p-value: 0.08564782196903878
Processing hliaoutakis benchmark.
Corr: 0.3687, p-value: 0.03190269920992404
Processing mayosrs benchmark.
Corr: 0.2736, p-value: 0.005627247864234288
Processing umnsrs benchmark.
Corr: 0.3628, p-value: 4.736195605580019e-19
Processing EHR-RelB benchmark.
Corr: 0.3193, p-value: 8.226481711885376e-87


In [8]:
tokenizer = AutoTokenizer.from_pretrained('medicalai/ClinicalBERT')
model = AutoModel.from_pretrained('medicalai/ClinicalBERT')

gen_statistics(model=model, tokenizer=tokenizer)

Processing pedersen benchmark.
Corr: 0.2102, p-value: 0.2650015661072366
Processing hliaoutakis benchmark.
Corr: 0.2614, p-value: 0.1353965201101399
Processing mayosrs benchmark.
Corr: 0.2811, p-value: 0.004406990854757223
Processing umnsrs benchmark.
Corr: 0.3042, p-value: 1.3914538227810849e-13
Processing EHR-RelB benchmark.
Corr: 0.3088, p-value: 4.672518789923176e-81


In [9]:
tokenizer = AutoTokenizer.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')
model = AutoModel.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')

gen_statistics(model=model, tokenizer=tokenizer)

Processing pedersen benchmark.
Corr: 0.1076, p-value: 0.5712579826728992
Processing hliaoutakis benchmark.
Corr: 0.1985, p-value: 0.26038542950651455
Processing mayosrs benchmark.
Corr: 0.1734, p-value: 0.08296951384465213
Processing umnsrs benchmark.
Corr: 0.2345, p-value: 1.655438056157631e-08
Processing EHR-RelB benchmark.
Corr: 0.3384, p-value: 5.771290879971461e-98


In [10]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v1')
model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v1')

gen_statistics(model=model, tokenizer=tokenizer)

Processing pedersen benchmark.
Corr: 0.6968, p-value: 1.887848344199177e-05
Processing hliaoutakis benchmark.
Corr: 0.6423, p-value: 4.2222764894625485e-05
Processing mayosrs benchmark.
Corr: 0.4506, p-value: 2.262756438415897e-06
Processing umnsrs benchmark.
Corr: 0.448, p-value: 2.686355593852232e-29
Processing EHR-RelB benchmark.
Corr: 0.5132, p-value: 4.2836718531969154e-243
