In [3]:
import os
from sklearn.model_selection import train_test_split
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Subset
from transformers import EarlyStoppingCallback
from sklearn.model_selection import StratifiedKFold 
import numpy as np

In [4]:
class MediClaimDataset(torch.utils.data.Dataset):
    def __init__(self, premises, hypothesis1, hypothesis2, hypothesis3, labels, tokenizer_name='allenai/biomed_roberta_base'):
        self.premises = premises
        self.hypothesis1 = hypothesis1
        self.hypothesis2 = hypothesis2
        self.hypothesis3 = hypothesis3
        self.labels = labels
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        separator = self.tokenizer.sep_token
        grouped_hypotheses = separator.join([self.hypothesis1[idx], self.hypothesis2[idx], self.hypothesis3[idx]])
        
        tokenized_input = self.tokenizer(
        text=self.premises[idx],
        text_pair=grouped_hypotheses,
        padding='max_length',
        truncation=True,
        max_length=512,
        return_tensors='pt')
        tokenized_input = {key: val.squeeze(0) for key, val in tokenized_input.items()}
        tokenized_input['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return tokenized_input

In [5]:
data = pd.read_excel('/home/elson/topk3_minilm.xlsx',engine='openpyxl')
df= data.dropna(subset=['label'])
label_encoder = LabelEncoder()
claims = df.claim.tolist()
labels = df.label.tolist()
encoded_labels = label_encoder.fit_transform(labels)
evidence_1 = df.top_1_minilm_ce.to_list()
evidence_2 = df.top_2_minilm_ce.to_list()
evidence_3 = df.top_3_minilm_ce.to_list()

In [6]:
print(len(data))

861


In [7]:
print(encoded_labels)
print(label_encoder.classes_)

[2 2 2 0 2 1 2 2 2 2 0 0 1 2 1 2 2 0 2 0 2 0 2 2 1 0 2 2 2 2 2 2 2 2 1 2 2
 2 2 0 2 0 2 2 1 1 1 1 1 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 0 1 0 1
 2 2 0 2 2 2 0 1 1 1 1 0 1 2 2 2 2 2 2 0 2 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2
 2 1 2 1 2 0 1 1 2 2 1 2 2 2 2 1 1 1 2 0 1 2 2 2 0 0 1 1 1 1 2 2 1 1 2 1 2
 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2
 2 1 2 2 2 2 2 2 2 1 0 1 1 1 2 2 2 2 2 1 2 2 2 2 2 1 0 2 2 2 2 2 1 2 2 1 1
 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 0 2 2 2 1 2
 1 2 1 1 2 2 2 1 2 2 2 2 1 2 2 1 1 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 1 2 2 1 2
 2 2 2 0 2 2 1 1 0 2 1 1 1 0 1 1 1 1 1 2 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
 1 2 1 1 1 1 2 1 1 1 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 0 2 2 2 2 2 2 2 2 2 2 1
 1 1 2 1 2 0 1 2 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 0 2 2 2 1 2 1 2 0 1 0 1
 2 1 2 2 2 2 2 2 2 2 2 0 0 2 2 2 2 0 2 2 2 0 0 2 2 0 2 2 2 1 0 1 2 2 2 2 2
 2 0 2 0 2 2 2 0 1 2 2 1 

In [8]:
from sklearn.model_selection import train_test_split

# Perform the split
train_premises, test_premises, train_hypothesis1, test_hypothesis1,train_hypothesis2, test_hypothesis2, train_hypothesis3, test_hypothesis3, train_labels, test_labels = train_test_split(
    claims, evidence_1,evidence_2,evidence_3, encoded_labels, test_size=0.2, random_state=42)

In [9]:
import torch
print(torch.cuda.device_count())
print("Available GPUs:")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

4
Available GPUs:
GPU 0: Tesla V100-SXM2-32GB
GPU 1: Tesla V100-SXM2-32GB
GPU 2: Tesla V100-SXM2-32GB
GPU 3: Tesla V100-SXM2-32GB


In [10]:
model_name = "allenai/biomed_roberta_base"
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                 num_labels=3, ignore_mismatched_sizes=True)
device = "cuda:3"
model.to(device)

Some weights of the model checkpoint at allenai/biomed_roberta_base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allenai/biomed_roberta_base and are newly initialized: ['classi

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerN

In [11]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average="weighted")  # Specify average method
    recall = recall_score(labels, preds, average="weighted")  # Specify average method

    return {"accuracy": acc, "precision": prec, "recall": recall, "f1": f1}

In [12]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

In [13]:
import gc

torch.cuda.set_device(3)

# Clearing the cache
torch.cuda.empty_cache()
gc.collect()
# Checking GPU memory, making sure to reset peak memory stats
torch.cuda.reset_peak_memory_stats()

os.environ["CUDA_VISIBLE_DEVICES"] = "3"



In [14]:
current_device = torch.cuda.current_device()
print(f"Current CUDA device: GPU {current_device}")

Current CUDA device: GPU 3


In [15]:
k = 5

kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
labels = np.array(train_labels)  # Ensure train_labels is a list or numpy array of your labels
dataset = MediClaimDataset(train_premises, train_hypothesis1, train_hypothesis2, train_hypothesis3, train_labels)
model = model.to('cuda:3')
for fold, (train_idx, val_idx) in enumerate(kf.split(np.zeros(len(labels)), labels)):
    print(f"Starting fold {fold + 1}/{k}")

    # Splitting the dataset
    train_subs = Subset(dataset, train_idx)
    val_subs = Subset(dataset, val_idx)

    train_loader = DataLoader(train_subs, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_subs, batch_size=8)

    training_args = TrainingArguments(
    output_dir=f'/home/elson/biomedroberta/results/fold_{fold}',
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    dataloader_pin_memory=True,
    dataloader_num_workers=4,
    fp16=True,
    warmup_ratio=0.06,
    weight_decay=0.01,
    logging_dir=f'./logs/fold_{fold}',
    logging_steps=10,
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",  # Save at the end of each epoch to match the evaluation strategy
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",  # Ensure this metric is returned from your compute_metrics function
    report_to="none")


    trainer = Trainer(
        model=model.to(device),
        args=training_args,
        train_dataset=train_subs,
        eval_dataset=val_subs,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.01)],
    )

    # Training and Evaluation
    trainer.train()
    eval_result = trainer.evaluate()

    # Optionally, you can save model for each fold
    model.save_pretrained(f'/home/elson/biomedroberta/model_fold_{fold}')
    tokenizer.save_pretrained(f'/home/elson/biomedroberta/tokenizer_fold_{fold}')

    print(f"Fold {fold + 1} completed. Eval Result: {eval_result}")


Starting fold 1/5


Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 80


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0342,0.861125,0.645161,0.416233,0.645161,0.506009
2,0.9112,0.823169,0.645161,0.416233,0.645161,0.506009
3,0.7885,0.805923,0.645161,0.416233,0.645161,0.506009
4,0.7564,0.839355,0.66129,0.564799,0.66129,0.573582
5,0.6566,0.843655,0.693548,0.610235,0.693548,0.634426


***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/biomedroberta/results/fold_0/checkpoint-16
Configuration saved in /home/elson/biomedroberta/results/fold_0/checkpoint-16/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_0/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_0/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/results/fold_0/checkpoint-16/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/biomedroberta/results/fold_0/checkpoint-32
Configuration saved in /home/elson/biomedroberta/results/fold_0/checkpoint-32/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_0/checkpoint-32/pytorch_model.bin


  _warn_prf(average, modifier, msg_start, len(result))
Configuration saved in /home/elson/biomedroberta/model_fold_0/config.json
Model weights saved in /home/elson/biomedroberta/model_fold_0/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/tokenizer_fold_0/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/tokenizer_fold_0/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 80


Fold 1 completed. Eval Result: {'eval_loss': 0.8436551094055176, 'eval_accuracy': 0.6935483870967742, 'eval_precision': 0.6102352655833132, 'eval_recall': 0.6935483870967742, 'eval_f1': 0.6344256970442929, 'eval_runtime': 1.1084, 'eval_samples_per_second': 111.874, 'eval_steps_per_second': 3.609, 'epoch': 5.0}
Starting fold 2/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6957,0.697345,0.685484,0.624156,0.685484,0.653112
2,0.5435,0.698992,0.717742,0.637144,0.717742,0.662029
3,0.431,0.742855,0.693548,0.677337,0.693548,0.681762
4,0.2755,0.881804,0.717742,0.685947,0.717742,0.696971
5,0.2149,0.949966,0.701613,0.679639,0.701613,0.679394


***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/biomedroberta/results/fold_1/checkpoint-16
Configuration saved in /home/elson/biomedroberta/results/fold_1/checkpoint-16/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_1/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_1/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/results/fold_1/checkpoint-16/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_1/checkpoint-32
Configuration saved in /home/elson/biomedroberta/results/fold_1/checkpoint-32/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_1/checkpoint-32/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedrobert

Configuration saved in /home/elson/biomedroberta/model_fold_1/config.json
Model weights saved in /home/elson/biomedroberta/model_fold_1/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/tokenizer_fold_1/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/tokenizer_fold_1/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 80


Fold 2 completed. Eval Result: {'eval_loss': 0.698991596698761, 'eval_accuracy': 0.717741935483871, 'eval_precision': 0.6371437519574068, 'eval_recall': 0.717741935483871, 'eval_f1': 0.6620293156517376, 'eval_runtime': 1.1485, 'eval_samples_per_second': 107.963, 'eval_steps_per_second': 3.483, 'epoch': 5.0}
Starting fold 3/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6132,0.390915,0.862903,0.870316,0.862903,0.861458
2,0.4242,0.360811,0.870968,0.842937,0.870968,0.851242
3,0.3118,0.498248,0.854839,0.838541,0.854839,0.837806
4,0.1602,0.479405,0.830645,0.842505,0.830645,0.831541


***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_2/checkpoint-16
Configuration saved in /home/elson/biomedroberta/results/fold_2/checkpoint-16/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_2/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_2/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/results/fold_2/checkpoint-16/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_2/checkpoint-32
Configuration saved in /home/elson/biomedroberta/results/fold_2/checkpoint-32/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_2/checkpoint-32/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_2/checkpoint-32/tokenizer_config.json
Sp

Configuration saved in /home/elson/biomedroberta/model_fold_2/config.json
Model weights saved in /home/elson/biomedroberta/model_fold_2/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/tokenizer_fold_2/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/tokenizer_fold_2/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 80


Fold 3 completed. Eval Result: {'eval_loss': 0.36081090569496155, 'eval_accuracy': 0.8709677419354839, 'eval_precision': 0.84293659621802, 'eval_recall': 0.8709677419354839, 'eval_f1': 0.8512424865638741, 'eval_runtime': 0.8012, 'eval_samples_per_second': 154.774, 'eval_steps_per_second': 4.993, 'epoch': 4.0}
Starting fold 4/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3203,0.182111,0.943548,0.942183,0.943548,0.941178
2,0.2808,0.200343,0.927419,0.930015,0.927419,0.92254
3,0.1426,0.288966,0.927419,0.926878,0.927419,0.924677
4,0.0639,0.329409,0.91129,0.90962,0.91129,0.907199


***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_3/checkpoint-16
Configuration saved in /home/elson/biomedroberta/results/fold_3/checkpoint-16/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_3/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_3/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/results/fold_3/checkpoint-16/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_3/checkpoint-32
Configuration saved in /home/elson/biomedroberta/results/fold_3/checkpoint-32/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_3/checkpoint-32/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_3/checkpoint-32/tokenizer_config.json
Sp

Configuration saved in /home/elson/biomedroberta/model_fold_3/config.json
Model weights saved in /home/elson/biomedroberta/model_fold_3/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/tokenizer_fold_3/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/tokenizer_fold_3/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 496
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 80


Fold 4 completed. Eval Result: {'eval_loss': 0.18211132287979126, 'eval_accuracy': 0.9435483870967742, 'eval_precision': 0.9421834394297021, 'eval_recall': 0.9435483870967742, 'eval_f1': 0.9411775003172852, 'eval_runtime': 0.8529, 'eval_samples_per_second': 145.379, 'eval_steps_per_second': 4.69, 'epoch': 4.0}
Starting fold 5/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2633,0.14535,0.95122,0.95223,0.95122,0.951315
2,0.2434,0.184825,0.95122,0.952208,0.95122,0.949353
3,0.1197,0.328345,0.910569,0.918604,0.910569,0.894842
4,0.0459,0.267114,0.934959,0.938294,0.934959,0.930219


***** Running Evaluation *****
  Num examples = 123
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_4/checkpoint-16
Configuration saved in /home/elson/biomedroberta/results/fold_4/checkpoint-16/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_4/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_4/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/results/fold_4/checkpoint-16/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 123
  Batch size = 8
Saving model checkpoint to /home/elson/biomedroberta/results/fold_4/checkpoint-32
Configuration saved in /home/elson/biomedroberta/results/fold_4/checkpoint-32/config.json
Model weights saved in /home/elson/biomedroberta/results/fold_4/checkpoint-32/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/results/fold_4/checkpoint-32/tokenizer_config.json
Sp

Configuration saved in /home/elson/biomedroberta/model_fold_4/config.json
Model weights saved in /home/elson/biomedroberta/model_fold_4/pytorch_model.bin
tokenizer config file saved in /home/elson/biomedroberta/tokenizer_fold_4/tokenizer_config.json
Special tokens file saved in /home/elson/biomedroberta/tokenizer_fold_4/special_tokens_map.json


Fold 5 completed. Eval Result: {'eval_loss': 0.14534994959831238, 'eval_accuracy': 0.9512195121951219, 'eval_precision': 0.9522297893290119, 'eval_recall': 0.9512195121951219, 'eval_f1': 0.9513151474539552, 'eval_runtime': 0.9044, 'eval_samples_per_second': 136.008, 'eval_steps_per_second': 4.423, 'epoch': 4.0}


In [16]:
from transformers import AutoModelForSequenceClassification

# Assuming test_dataset is prepared similarly to your training/validation datasets
test_dataset = MediClaimDataset(test_premises, test_hypothesis1, test_hypothesis2, test_hypothesis3, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16)

# Load the best model from this fold
model_path = f'/home/elson/biomedroberta/model_fold_4'
model = AutoModelForSequenceClassification.from_pretrained(model_path).to('cuda:3')

# Evaluate on the test set
test_results = trainer.evaluate(test_dataset)
print(f"Test Results for Fold {fold + 1}: {test_results}")

loading configuration file https://huggingface.co/allenai/biomed_roberta_base/resolve/main/config.json from cache at /home/elson/.cache/huggingface/transformers/c1ca1e23086fa4e462e39ca18d2012f066313a311a382e45c322f2dbdb52984a.1981eea830332b491f83223a5bbbbab485bb933d2ea08a89b610ad88f27c9118
Model config RobertaConfig {
  "_name_or_path": "allenai/biomed_roberta_base",
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

loading file https://huggingface.co/allenai/biomed_roberta_base/resolve/m

Test Results for Fold 5: {'eval_loss': 1.0618221759796143, 'eval_accuracy': 0.6645161290322581, 'eval_precision': 0.6457521222410866, 'eval_recall': 0.6645161290322581, 'eval_f1': 0.6522749911996225, 'eval_runtime': 1.0449, 'eval_samples_per_second': 148.34, 'eval_steps_per_second': 4.785, 'epoch': 4.0}
