In [13]:
import json
import pandas as pd
import plotly.express as px
from rich import print

In [14]:
from eval_bio_lms.model_utilities import MODEL_DEFS

In [15]:
MODEL_NAME_TO_DEF = {model_def.name: model_def for model_def in MODEL_DEFS}

In [16]:
print(MODEL_DEFS)

# Show Token Count Measures

In [26]:
!ls ../eval_bio_lms/tokenization_output/

mimic-corpus-token-counts-num-samples-10000.csv


In [27]:
df_tok_counts = pd.read_csv('../eval_bio_lms/tokenization_output/mimic-corpus-token-counts-num-samples-10000.csv')

In [28]:
df_tok_counts

Unnamed: 0,BioLinkBERT-base,BioLinkBERT-large,bert-base-uncased,bert-base-cased,roberta-base,roberta-large,biobert-base-cased-v1.2,scibert_scivocab_uncased,scibert_scivocab_cased,BiomedNLP-PubMedBERT-base-uncased-abstract,BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext,Bio_ClinicalBERT,bluebert_pubmed_uncased_L-24_H-1024_A-16,bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16,bluebert_pubmed_uncased_L-12_H-768_A-12,bluebert_pubmed_mimic_uncased_L-12_H-768_A-12
0,342,342,377,440,346,346,440,343,381,342,340,379,377,377,377,377
1,338,338,344,415,321,321,415,337,377,338,338,349,344,344,344,344
2,348,348,354,404,330,330,404,349,373,348,350,360,354,354,354,354
3,144,144,155,289,250,250,289,146,229,144,142,168,155,155,155,155
4,216,216,221,243,247,247,243,212,221,216,213,240,221,221,221,221
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,317,317,333,353,324,324,353,317,333,317,315,350,333,333,333,333
9996,393,393,414,446,382,382,446,393,414,393,395,413,414,414,414,414
9997,48,48,58,61,58,58,61,48,50,48,48,59,58,58,58,58
9998,1027,1027,1141,1281,2370,2370,1281,1070,1167,1027,1022,1196,1141,1141,1141,1141


In [29]:
df_plt = (
    df_tok_counts
    .mean()
    .to_frame('mean tokens per sample')
    .reset_index()
    .rename(columns={'index': 'model'})
)
df_plt = df_plt.sort_values("mean tokens per sample")
df_plt["cased"] = df_plt["model"].apply(lambda x: MODEL_NAME_TO_DEF[x].cased)
df_plt

Unnamed: 0,model,mean tokens per sample,cased
0,BioLinkBERT-base,472.875,False
1,BioLinkBERT-large,472.875,False
9,BiomedNLP-PubMedBERT-base-uncased-abstract,472.875,False
10,BiomedNLP-PubMedBERT-base-uncased-abstract-ful...,473.4483,False
7,scibert_scivocab_uncased,482.1416,False
2,bert-base-uncased,518.846,False
12,bluebert_pubmed_uncased_L-24_H-1024_A-16,518.846,False
13,bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16,518.846,False
14,bluebert_pubmed_uncased_L-12_H-768_A-12,518.846,False
15,bluebert_pubmed_mimic_uncased_L-12_H-768_A-12,518.846,False


In [30]:
fig = px.bar(df_plt, x='mean tokens per sample', y='model', color="cased", height=500)
fig.show()

# Show Masked Language Modeling Measures

In [31]:
!ls ../eval_bio_lms/mlm_output/

mimic-corpus-mlm-num-samples-10000.csv


In [32]:
df_mlm = pd.read_csv('../eval_bio_lms/mlm_output/mimic-corpus-mlm-num-samples-10000.csv')

In [33]:
df_mlm

Unnamed: 0,eval_loss,eval_accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second,model_name,num_seq,perplexity
0,12.562969,9e-06,57.336,644.133,2.529,BioLinkBERT-base,36932,285777.612244
1,2.813987,0.547744,140.9433,262.035,1.029,BioLinkBERT-large,36932,16.67627
2,3.855733,0.423751,64.0789,632.392,2.481,bert-base-uncased,40523,47.263256
3,3.338733,0.460666,71.4966,642.422,2.518,bert-base-cased,45931,28.183401
4,3.80742,0.552228,88.6963,574.364,2.244,roberta-base,50944,45.034123
5,5.972937,0.578766,210.3097,242.233,0.946,roberta-large,50944,392.657068
6,2.797929,0.511646,72.2515,635.71,2.491,biobert-base-cased-v1.2,45931,16.41063
7,2.662429,0.561726,59.6941,630.816,2.479,scibert_scivocab_uncased,37656,14.331052
8,2.526538,0.578574,65.6153,627.171,2.454,scibert_scivocab_cased,41152,12.510123
9,3.287645,0.49642,57.6582,640.533,2.515,BiomedNLP-PubMedBERT-base-uncased-abstract,36932,26.779719


In [34]:
#df_mlm['perplexity'] = df_mlm['perplexity'].clip(0,100)

In [37]:
df_plt = df_mlm[['eval_loss', 'eval_accuracy', 'perplexity', 'model_name']].melt(id_vars=['model_name'])
df_plt = df_plt.sort_values(['variable', 'value'])
var_groups = {var: df for var, df in df_plt.groupby("variable")}
df_plt = pd.concat([var_groups['eval_loss'], var_groups['eval_accuracy'], var_groups['perplexity']])
df_plt["cased"] = df_plt["model_name"].apply(lambda x: MODEL_NAME_TO_DEF[x].cased)
df_plt

Unnamed: 0,model_name,variable,value,cased
11,Bio_ClinicalBERT,eval_loss,2.075504,False
15,bluebert_pubmed_mimic_uncased_L-12_H-768_A-12,eval_loss,2.401485,False
8,scibert_scivocab_cased,eval_loss,2.526538,True
7,scibert_scivocab_uncased,eval_loss,2.662429,False
6,biobert-base-cased-v1.2,eval_loss,2.797929,True
1,BioLinkBERT-large,eval_loss,2.813987,False
10,BiomedNLP-PubMedBERT-base-uncased-abstract-ful...,eval_loss,2.83615,False
9,BiomedNLP-PubMedBERT-base-uncased-abstract,eval_loss,3.287645,False
13,bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16,eval_loss,3.330667,False
3,bert-base-cased,eval_loss,3.338733,True


In [38]:
fig = px.bar(df_plt, x='value', y='model_name', facet_row='variable', color='cased', height=1200)
fig.update_xaxes(matches=None)

In [51]:
!ls ../eval_bio_lms/ner_output

crichton-2017-ner.json


In [52]:
with open("../eval_bio_lms/ner_output/crichton-2017-ner.json", "r") as fp:
    ner_blobs = json.load(fp)

In [53]:
ner_blobs

{'BioLinkBERT-base-BC2GM-IOB': {'GENE': {'precision': 0.8403908794788274,
   'recall': 0.868047523919672,
   'f1': 0.8539953452288596,
   'number': 9511},
  'overall_precision': 0.8403908794788274,
  'overall_recall': 0.868047523919672,
  'overall_f1': 0.8539953452288596,
  'overall_accuracy': 0.9766408629687834,
  'model_def_name': 'BioLinkBERT-base',
  'subset_name': 'BC2GM-IOB'},
 'BioLinkBERT-large-BC2GM-IOB': {'GENE': {'precision': 0.8521480570081,
   'recall': 0.8738303017558616,
   'f1': 0.8628529900332225,
   'number': 9511},
  'overall_precision': 0.8521480570081,
  'overall_recall': 0.8738303017558616,
  'overall_f1': 0.8628529900332225,
  'overall_accuracy': 0.9780636232530767,
  'model_def_name': 'BioLinkBERT-large',
  'subset_name': 'BC2GM-IOB'},
 'bert-base-uncased-BC2GM-IOB': {'GENE': {'precision': 0.8189215280650944,
   'recall': 0.8423292432087383,
   'f1': 0.8304604734100206,
   'number': 14099},
  'overall_precision': 0.8189215280650944,
  'overall_recall': 0.8423292

In [54]:
rows = []
for key, blob in ner_blobs.items():
    row = (
        blob['model_def_name'],
        blob['subset_name'],
        blob['overall_precision'],
        blob['overall_recall'],
        blob['overall_f1'],
        blob['overall_accuracy'],
    )
    rows.append(row)
df_plt = pd.DataFrame(
    rows,
    columns=['model_name', 'dataset', 'precision', 'recall', 'f1', 'accuracy']
)
df_plt["cased"] = df_plt["model_name"].apply(lambda x: MODEL_NAME_TO_DEF[x].cased)

In [55]:
df_plt = df_plt.sort_values(['dataset', 'f1'])
df_plt

Unnamed: 0,model_name,dataset,precision,recall,f1,accuracy,cased
13,bluebert_pubmed_mimic_uncased_L-24_H-1024_A-16,BC2GM-IOB,0.816618,0.83857,0.827449,0.965639,False
2,bert-base-uncased,BC2GM-IOB,0.818922,0.842329,0.83046,0.965797,False
15,bluebert_pubmed_mimic_uncased_L-12_H-768_A-12,BC2GM-IOB,0.827444,0.842116,0.834716,0.967126,False
4,roberta-base,BC2GM-IOB,0.826112,0.847616,0.836726,0.965628,True
3,bert-base-cased,BC2GM-IOB,0.821786,0.858962,0.839963,0.964895,True
11,Bio_ClinicalBERT,BC2GM-IOB,0.837355,0.858114,0.847608,0.966785,False
7,scibert_scivocab_uncased,BC2GM-IOB,0.843705,0.862267,0.852885,0.975116,False
0,BioLinkBERT-base,BC2GM-IOB,0.840391,0.868048,0.853995,0.976641,False
12,bluebert_pubmed_uncased_L-24_H-1024_A-16,BC2GM-IOB,0.847205,0.862047,0.854561,0.971225,False
10,BiomedNLP-PubMedBERT-base-uncased-abstract-ful...,BC2GM-IOB,0.841826,0.868851,0.855125,0.978176,False


In [56]:
for dataset, df in df_plt.groupby('dataset'):
    fig = px.bar(df, x='f1', y='model_name', height=500)
    fig.show()