In [1]:
import os
from sklearn.model_selection import train_test_split
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Subset
from transformers import EarlyStoppingCallback
from sklearn.model_selection import StratifiedKFold 
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MediClaimDataset(torch.utils.data.Dataset):
    def __init__(self, premises, hypothesis1, hypothesis2, hypothesis3, labels, tokenizer_name='dmis-lab/biobert-v1.1'):
        self.premises = premises
        self.hypothesis1 = hypothesis1
        self.hypothesis2 = hypothesis2
        self.hypothesis3 = hypothesis3
        self.labels = labels
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        separator = self.tokenizer.sep_token
        grouped_hypotheses = separator.join([self.hypothesis1[idx], self.hypothesis2[idx], self.hypothesis3[idx]])
        
        tokenized_input = self.tokenizer(
        text=self.premises[idx],
        text_pair=grouped_hypotheses,
        padding='max_length',
        truncation=True,
        max_length=512,
        return_tensors='pt')
        tokenized_input = {key: val.squeeze(0) for key, val in tokenized_input.items()}
        tokenized_input['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return tokenized_input

In [3]:
data = pd.read_excel('/home/elson/topk3_minilm.xlsx',engine='openpyxl')
df= data.dropna(subset=['label'])
label_encoder = LabelEncoder()
claims = df.claim.tolist()
labels = df.label.tolist()
encoded_labels = label_encoder.fit_transform(labels)
evidence_1 = df.top_1_minilm_ce.to_list()
evidence_2 = df.top_2_minilm_ce.to_list()
evidence_3 = df.top_3_minilm_ce.to_list()

In [4]:
print(len(data))

861


In [5]:
print(encoded_labels)
print(label_encoder.classes_)

[2 2 2 0 2 1 2 2 2 2 0 0 1 2 1 2 2 0 2 0 2 0 2 2 1 0 2 2 2 2 2 2 2 2 1 2 2
 2 2 0 2 0 2 2 1 1 1 1 1 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 0 1 0 1
 2 2 0 2 2 2 0 1 1 1 1 0 1 2 2 2 2 2 2 0 2 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2
 2 1 2 1 2 0 1 1 2 2 1 2 2 2 2 1 1 1 2 0 1 2 2 2 0 0 1 1 1 1 2 2 1 1 2 1 2
 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2
 2 1 2 2 2 2 2 2 2 1 0 1 1 1 2 2 2 2 2 1 2 2 2 2 2 1 0 2 2 2 2 2 1 2 2 1 1
 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 0 2 2 2 1 2
 1 2 1 1 2 2 2 1 2 2 2 2 1 2 2 1 1 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 1 2 2 1 2
 2 2 2 0 2 2 1 1 0 2 1 1 1 0 1 1 1 1 1 2 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
 1 2 1 1 1 1 2 1 1 1 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 0 2 2 2 2 2 2 2 2 2 2 1
 1 1 2 1 2 0 1 2 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 0 2 2 2 1 2 1 2 0 1 0 1
 2 1 2 2 2 2 2 2 2 2 2 0 0 2 2 2 2 0 2 2 2 0 0 2 2 0 2 2 2 1 0 1 2 2 2 2 2
 2 0 2 0 2 2 2 0 1 2 2 1 

In [6]:
from sklearn.model_selection import train_test_split

# Perform the split
train_premises, test_premises, train_hypothesis1, test_hypothesis1,train_hypothesis2, test_hypothesis2, train_hypothesis3, test_hypothesis3, train_labels, test_labels = train_test_split(
    claims, evidence_1,evidence_2,evidence_3, encoded_labels, test_size=0.2, random_state=42)

In [7]:
import torch
print(torch.cuda.device_count())
print("Available GPUs:")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

4
Available GPUs:
GPU 0: Tesla V100-SXM2-32GB
GPU 1: Tesla V100-SXM2-32GB
GPU 2: Tesla V100-SXM2-32GB
GPU 3: Tesla V100-SXM2-32GB


In [8]:
model_name = "dmis-lab/biobert-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                 num_labels=3, ignore_mismatched_sizes=True)
device = "cuda:3"
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [9]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average="weighted")  # Specify average method
    recall = recall_score(labels, preds, average="weighted")  # Specify average method

    return {"accuracy": acc, "precision": prec, "recall": recall, "f1": f1}

In [10]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

In [11]:
import gc

torch.cuda.set_device(3)

# Clearing the cache
torch.cuda.empty_cache()
gc.collect()
# Checking GPU memory, making sure to reset peak memory stats
torch.cuda.reset_peak_memory_stats()

os.environ["CUDA_VISIBLE_DEVICES"] = "3"



In [12]:
current_device = torch.cuda.current_device()
print(f"Current CUDA device: GPU {current_device}")

Current CUDA device: GPU 3


In [13]:
k = 5

kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
labels = np.array(train_labels)  # Ensure train_labels is a list or numpy array of your labels
dataset = MediClaimDataset(train_premises, train_hypothesis1, train_hypothesis2, train_hypothesis3, train_labels)
model = model.to('cuda:3')
for fold, (train_idx, val_idx) in enumerate(kf.split(np.zeros(len(labels)), labels)):
    print(f"Starting fold {fold + 1}/{k}")

    # Splitting the dataset
    train_subs = Subset(dataset, train_idx)
    val_subs = Subset(dataset, val_idx)

    train_loader = DataLoader(train_subs, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_subs, batch_size=16)

    training_args = TrainingArguments(
    output_dir=f'/home/elson/biobert/results/fold_{fold}',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    dataloader_pin_memory=True,
    dataloader_num_workers=4,
    fp16=True,
    warmup_ratio=0.06,
    weight_decay=0.01,
    logging_dir=f'./logs/fold_{fold}',
    logging_steps=10,
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",  # Save at the end of each epoch to match the evaluation strategy
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",  # Ensure this metric is returned from your compute_metrics function
    report_to="none")


    trainer = Trainer(
        model=model.to(device),
        args=training_args,
        train_dataset=train_subs,
        eval_dataset=val_subs,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.01)],
    )

    # Training and Evaluation
    trainer.train()
    eval_result = trainer.evaluate()

    # Optionally, you can save model for each fold
    model.save_pretrained(f'/home/elson/biobert/model_fold_{fold}')
    tokenizer.save_pretrained(f'/home/elson/biobert/tokenizer_fold_{fold}')

    print(f"Fold {fold + 1} completed. Eval Result: {eval_result}")


Starting fold 1/5


Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.878193,0.645161,0.416233,0.645161,0.506009
2,0.976000,0.817708,0.645161,0.416233,0.645161,0.506009
3,0.852800,0.786853,0.66129,0.681121,0.66129,0.541379
4,0.731300,0.771618,0.66129,0.598596,0.66129,0.543054
5,0.692100,0.761171,0.677419,0.640173,0.677419,0.57452


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/biobert/results/fold_0/checkpoint-8
Configuration saved in /home/elson/biobert/results/fold_0/checkpoint-8/config.json
Model weights saved in /home/elson/biobert/results/fold_0/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_0/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fold_0/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/biobert/results/fold_0/checkpoint-16
Configuration saved in /home/elson/biobert/results/fold_0/checkpoint-16/config.json
Model weights saved in /home/elson/biobert/results/fold_0/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/

  _warn_prf(average, modifier, msg_start, len(result))
Configuration saved in /home/elson/biobert/model_fold_0/config.json
Model weights saved in /home/elson/biobert/model_fold_0/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/tokenizer_fold_0/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/tokenizer_fold_0/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 1 completed. Eval Result: {'eval_loss': 0.7611712217330933, 'eval_accuracy': 0.6774193548387096, 'eval_precision': 0.6401734887503389, 'eval_recall': 0.6774193548387096, 'eval_f1': 0.5745203787025801, 'eval_runtime': 0.78, 'eval_samples_per_second': 158.982, 'eval_steps_per_second': 2.564, 'epoch': 5.0}
Starting fold 2/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.655463,0.709677,0.728616,0.709677,0.678503
2,0.609900,0.669094,0.733871,0.725172,0.733871,0.713485
3,0.465400,0.676083,0.733871,0.704306,0.733871,0.702319
4,0.293300,0.667703,0.75,0.746334,0.75,0.734831
5,0.220800,0.68015,0.774194,0.764915,0.774194,0.753142


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_1/checkpoint-8
Configuration saved in /home/elson/biobert/results/fold_1/checkpoint-8/config.json
Model weights saved in /home/elson/biobert/results/fold_1/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_1/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fold_1/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_1/checkpoint-16
Configuration saved in /home/elson/biobert/results/fold_1/checkpoint-16/config.json
Model weights saved in /home/elson/biobert/results/fold_1/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_1/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fo

Configuration saved in /home/elson/biobert/model_fold_1/config.json
Model weights saved in /home/elson/biobert/model_fold_1/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/tokenizer_fold_1/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/tokenizer_fold_1/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 2 completed. Eval Result: {'eval_loss': 0.6801501512527466, 'eval_accuracy': 0.7741935483870968, 'eval_precision': 0.7649150190773499, 'eval_recall': 0.7741935483870968, 'eval_f1': 0.7531418191816495, 'eval_runtime': 0.7883, 'eval_samples_per_second': 157.295, 'eval_steps_per_second': 2.537, 'epoch': 5.0}
Starting fold 3/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.184049,0.927419,0.927571,0.927419,0.926789
2,0.361300,0.215047,0.927419,0.925618,0.927419,0.926367
3,0.212100,0.216697,0.91129,0.914505,0.91129,0.912644
4,0.120000,0.21126,0.919355,0.915939,0.919355,0.916911


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_2/checkpoint-8
Configuration saved in /home/elson/biobert/results/fold_2/checkpoint-8/config.json
Model weights saved in /home/elson/biobert/results/fold_2/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_2/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fold_2/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_2/checkpoint-16
Configuration saved in /home/elson/biobert/results/fold_2/checkpoint-16/config.json
Model weights saved in /home/elson/biobert/results/fold_2/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_2/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fo

Configuration saved in /home/elson/biobert/model_fold_2/config.json
Model weights saved in /home/elson/biobert/model_fold_2/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/tokenizer_fold_2/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/tokenizer_fold_2/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 3 completed. Eval Result: {'eval_loss': 0.18404865264892578, 'eval_accuracy': 0.9274193548387096, 'eval_precision': 0.9275713533778048, 'eval_recall': 0.9274193548387096, 'eval_f1': 0.9267886427574168, 'eval_runtime': 0.8202, 'eval_samples_per_second': 151.189, 'eval_steps_per_second': 2.439, 'epoch': 4.0}
Starting fold 4/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.141415,0.967742,0.969083,0.967742,0.967656
2,0.302000,0.152845,0.959677,0.962049,0.959677,0.95896
3,0.168700,0.195778,0.951613,0.954989,0.951613,0.950679
4,0.111700,0.181129,0.951613,0.954989,0.951613,0.950679


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_3/checkpoint-8
Configuration saved in /home/elson/biobert/results/fold_3/checkpoint-8/config.json
Model weights saved in /home/elson/biobert/results/fold_3/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_3/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fold_3/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_3/checkpoint-16
Configuration saved in /home/elson/biobert/results/fold_3/checkpoint-16/config.json
Model weights saved in /home/elson/biobert/results/fold_3/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_3/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fo

Configuration saved in /home/elson/biobert/model_fold_3/config.json
Model weights saved in /home/elson/biobert/model_fold_3/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/tokenizer_fold_3/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/tokenizer_fold_3/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 496
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 4 completed. Eval Result: {'eval_loss': 0.14141470193862915, 'eval_accuracy': 0.967741935483871, 'eval_precision': 0.969082530372853, 'eval_recall': 0.967741935483871, 'eval_f1': 0.967655552902839, 'eval_runtime': 0.8075, 'eval_samples_per_second': 153.566, 'eval_steps_per_second': 2.477, 'epoch': 4.0}
Starting fold 5/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.11039,0.97561,0.976595,0.97561,0.975762
2,0.312100,0.134662,0.95122,0.950303,0.95122,0.949763
3,0.181600,0.101717,0.96748,0.969632,0.96748,0.967812
4,0.104700,0.093273,0.95935,0.959045,0.95935,0.958962


***** Running Evaluation *****
  Num examples = 123
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_4/checkpoint-8
Configuration saved in /home/elson/biobert/results/fold_4/checkpoint-8/config.json
Model weights saved in /home/elson/biobert/results/fold_4/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_4/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fold_4/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 123
  Batch size = 16
Saving model checkpoint to /home/elson/biobert/results/fold_4/checkpoint-16
Configuration saved in /home/elson/biobert/results/fold_4/checkpoint-16/config.json
Model weights saved in /home/elson/biobert/results/fold_4/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/results/fold_4/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/results/fo

Configuration saved in /home/elson/biobert/model_fold_4/config.json
Model weights saved in /home/elson/biobert/model_fold_4/pytorch_model.bin
tokenizer config file saved in /home/elson/biobert/tokenizer_fold_4/tokenizer_config.json
Special tokens file saved in /home/elson/biobert/tokenizer_fold_4/special_tokens_map.json


Fold 5 completed. Eval Result: {'eval_loss': 0.11038956046104431, 'eval_accuracy': 0.975609756097561, 'eval_precision': 0.9765952204976596, 'eval_recall': 0.975609756097561, 'eval_f1': 0.9757621951219512, 'eval_runtime': 1.0443, 'eval_samples_per_second': 117.781, 'eval_steps_per_second': 1.915, 'epoch': 4.0}


In [14]:
from transformers import AutoModelForSequenceClassification

# Assuming test_dataset is prepared similarly to your training/validation datasets
test_dataset = MediClaimDataset(test_premises, test_hypothesis1, test_hypothesis2, test_hypothesis3, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16)

# Load the best model from this fold
model_path = f'/home/elson/biobert/model_fold_4'
model = AutoModelForSequenceClassification.from_pretrained(model_path).to('cuda:3')

# Evaluate on the test set
test_results = trainer.evaluate(test_dataset)
print(f"Test Results for Fold {fold + 1}: {test_results}")

loading configuration file https://huggingface.co/dmis-lab/biobert-v1.1/resolve/main/config.json from cache at /home/elson/.cache/huggingface/transformers/f048b8136bae2b3abe91e9e82949295fb205887c84db3be2775e1cdb0ecfeeb9.d7812d36d3371e4d43299a0c4a938622c5251db0efa17a5d4d9b57037fcec823
Model config BertConfig {
  "_name_or_path": "dmis-lab/biobert-v1.1",
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}

loading file https://huggingface.co/dmis-lab/biobert-v1.

Test Results for Fold 5: {'eval_loss': 0.8097406029701233, 'eval_accuracy': 0.6774193548387096, 'eval_precision': 0.7099780321424443, 'eval_recall': 0.6774193548387096, 'eval_f1': 0.6809384164222873, 'eval_runtime': 0.9341, 'eval_samples_per_second': 165.94, 'eval_steps_per_second': 3.212, 'epoch': 4.0}
