In [29]:
from sklearn.model_selection import train_test_split
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Subset
from transformers import EarlyStoppingCallback
from sklearn.model_selection import StratifiedKFold 
import numpy as np

In [30]:
class MediClaimDataset(torch.utils.data.Dataset):
    def __init__(self, premises, hypothesis1, hypothesis2, hypothesis3, labels, tokenizer_name='allenai/scibert_scivocab_uncased'):
        self.premises = premises
        self.hypothesis1 = hypothesis1
        self.hypothesis2 = hypothesis2
        self.hypothesis3 = hypothesis3
        self.labels = labels
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        separator = self.tokenizer.sep_token
        grouped_hypotheses = separator.join([self.hypothesis1[idx], self.hypothesis2[idx], self.hypothesis3[idx]])
        
        tokenized_input = self.tokenizer(
        text=self.premises[idx],
        text_pair=grouped_hypotheses,
        padding='max_length',
        truncation=True,
        max_length=512,
        return_tensors='pt')
        tokenized_input = {key: val.squeeze(0) for key, val in tokenized_input.items()}
        tokenized_input['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return tokenized_input

In [31]:
data = pd.read_excel('/home/elson/topk3_minilm.xlsx',engine='openpyxl')
df= data.dropna(subset=['label'])
label_encoder = LabelEncoder()
claims = df.claim.tolist()
labels = df.label.tolist()
encoded_labels = label_encoder.fit_transform(labels)
evidence_1 = df.top_1_minilm_ce.to_list()
evidence_2 = df.top_2_minilm_ce.to_list()
evidence_3 = df.top_3_minilm_ce.to_list()

In [32]:
print(len(data))

861


In [33]:
print(encoded_labels)
print(label_encoder.classes_)

[2 2 2 0 2 1 2 2 2 2 0 0 1 2 1 2 2 0 2 0 2 0 2 2 1 0 2 2 2 2 2 2 2 2 1 2 2
 2 2 0 2 0 2 2 1 1 1 1 1 0 0 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 0 1 0 1
 2 2 0 2 2 2 0 1 1 1 1 0 1 2 2 2 2 2 2 0 2 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2
 2 1 2 1 2 0 1 1 2 2 1 2 2 2 2 1 1 1 2 0 1 2 2 2 0 0 1 1 1 1 2 2 1 1 2 1 2
 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2
 2 1 2 2 2 2 2 2 2 1 0 1 1 1 2 2 2 2 2 1 2 2 2 2 2 1 0 2 2 2 2 2 1 2 2 1 1
 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 0 2 2 2 1 2
 1 2 1 1 2 2 2 1 2 2 2 2 1 2 2 1 1 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 1 2 2 1 2
 2 2 2 0 2 2 1 1 0 2 1 1 1 0 1 1 1 1 1 2 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
 1 2 1 1 1 1 2 1 1 1 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 0 2 2 2 2 2 2 2 2 2 2 1
 1 1 2 1 2 0 1 2 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 0 2 2 2 1 2 1 2 0 1 0 1
 2 1 2 2 2 2 2 2 2 2 2 0 0 2 2 2 2 0 2 2 2 0 0 2 2 0 2 2 2 1 0 1 2 2 2 2 2
 2 0 2 0 2 2 2 0 1 2 2 1 

In [34]:
from sklearn.model_selection import train_test_split

# Perform the split
train_premises, test_premises, train_hypothesis1, test_hypothesis1,train_hypothesis2, test_hypothesis2, train_hypothesis3, test_hypothesis3, train_labels, test_labels = train_test_split(
    claims, evidence_1,evidence_2,evidence_3, encoded_labels, test_size=0.2, random_state=42)

In [35]:
model_name = "allenai/scibert_scivocab_uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                 num_labels=3, ignore_mismatched_sizes=True)
device = "cuda:3"
model.to(device)

Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/allenai/scibert_scivocab_uncased/resolve/main/config.json from cache at /home/elson/.cache/huggingface/transformers/858852fd2471ce39075378592ddc87f5a6551e64c6825d1b92c8dab9318e0fc3.03ff9e9f998b9a9d40647a2148a202e3fb3d568dc0f170dda9dda194bab4d5dd
Model config BertConfig {
  "_name_or_path": "allenai/scibert_scivocab_uncased",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 31090
}

loading file https

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31090, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [36]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, average="weighted")  # Specify average method
    recall = recall_score(labels, preds, average="weighted")  # Specify average method

    return {"accuracy": acc, "precision": prec, "recall": recall, "f1": f1}

In [37]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

In [38]:
import torch
import gc

torch.cuda.set_device(3)

# Clearing the cache
torch.cuda.empty_cache()
gc.collect()
# Checking GPU memory, making sure to reset peak memory stats
torch.cuda.reset_peak_memory_stats()

import torch

os.environ["CUDA_VISIBLE_DEVICES"] = "3"

current_device = torch.cuda.current_device()
print(f"Current CUDA device: GPU {current_device}")


Current CUDA device: GPU 3


In [None]:
k = 5

kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
labels = np.array(train_labels)  # Ensure train_labels is a list or numpy array of your labels
dataset = MediClaimDataset(train_premises, train_hypothesis1, train_hypothesis2, train_hypothesis3, train_labels)
model = model.to('cuda:3')
for fold, (train_idx, val_idx) in enumerate(kf.split(np.zeros(len(labels)), labels)):
    print(f"Starting fold {fold + 1}/{k}")

    # Splitting the dataset
    train_subs = Subset(dataset, train_idx)
    val_subs = Subset(dataset, val_idx)

    train_loader = DataLoader(train_subs, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_subs, batch_size=16)

    training_args = TrainingArguments(
    output_dir=f'/home/elson/scibert/results/fold_{fold}',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    dataloader_pin_memory=True,
    dataloader_num_workers=4,
    fp16=True,
    warmup_ratio=0.06,
    weight_decay=0.01,
    logging_dir=f'./logs/fold_{fold}',
    logging_steps=10,
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",  # Save at the end of each epoch to match the evaluation strategy
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",  # Ensure this metric is returned from your compute_metrics function
    report_to="none")


    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_subs,
        eval_dataset=val_subs,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.01)]
    )

    # Training and Evaluation
    trainer.train()
    eval_result = trainer.evaluate()

    # Optionally, you can save model for each fold
    model.save_pretrained(f'/home/elson/scibert/model_fold_{fold}')
    tokenizer.save_pretrained(f'/home/elson/scibert/tokenizer_fold_{fold}')

    print(f"Fold {fold + 1} completed. Eval Result: {eval_result}")


Could not locate the tokenizer configuration file, will try to use the model config instead.
loading configuration file https://huggingface.co/allenai/scibert_scivocab_uncased/resolve/main/config.json from cache at /home/elson/.cache/huggingface/transformers/858852fd2471ce39075378592ddc87f5a6551e64c6825d1b92c8dab9318e0fc3.03ff9e9f998b9a9d40647a2148a202e3fb3d568dc0f170dda9dda194bab4d5dd
Model config BertConfig {
  "_name_or_path": "allenai/scibert_scivocab_uncased",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 31090
}

loading file https

Starting fold 1/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.830135,0.645161,0.416233,0.645161,0.506009
2,0.904000,0.778977,0.709677,0.631258,0.709677,0.647175
3,0.777800,0.753417,0.685484,0.606043,0.685484,0.640058
4,0.613400,0.79803,0.701613,0.620275,0.701613,0.640777
5,0.535200,0.793664,0.685484,0.600947,0.685484,0.633016


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/scibert/results/fold_0/checkpoint-8
Configuration saved in /home/elson/scibert/results/fold_0/checkpoint-8/config.json
Model weights saved in /home/elson/scibert/results/fold_0/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/results/fold_0/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/results/fold_0/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/scibert/results/fold_0/checkpoint-16
Configuration saved in /home/elson/scibert/results/fold_0/checkpoint-16/config.json
Model weights saved in /home/elson/scibert/results/fold_0/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/

  _warn_prf(average, modifier, msg_start, len(result))
Configuration saved in /home/elson/scibert/model_fold_0/config.json
Model weights saved in /home/elson/scibert/model_fold_0/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/tokenizer_fold_0/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/tokenizer_fold_0/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 1 completed. Eval Result: {'eval_loss': 0.7789771556854248, 'eval_accuracy': 0.7096774193548387, 'eval_precision': 0.6312578694426217, 'eval_recall': 0.7096774193548387, 'eval_f1': 0.6471749990318708, 'eval_runtime': 0.9576, 'eval_samples_per_second': 129.485, 'eval_steps_per_second': 2.088, 'epoch': 5.0}
Starting fold 2/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.823341,0.637097,0.600209,0.637097,0.61417
2,0.732600,0.761515,0.677419,0.607951,0.677419,0.640553
3,0.571000,0.898374,0.637097,0.556689,0.637097,0.589782
4,0.409700,0.830824,0.677419,0.618626,0.677419,0.646536
5,0.309700,0.861388,0.685484,0.620398,0.685484,0.651117


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/scibert/results/fold_1/checkpoint-8
Configuration saved in /home/elson/scibert/results/fold_1/checkpoint-8/config.json
Model weights saved in /home/elson/scibert/results/fold_1/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/results/fold_1/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/results/fold_1/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to /home/elson/scibert/results/fold_1/checkpoint-16
Configuration saved in /home/elson/scibert/results/fold_1/checkpoint-16/config.json
Model weights saved in /home/elson/scibert/results/fold_1/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/

Configuration saved in /home/elson/scibert/model_fold_1/config.json
Model weights saved in /home/elson/scibert/model_fold_1/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/tokenizer_fold_1/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/tokenizer_fold_1/special_tokens_map.json
PyTorch: setting up devices
Using amp half precision backend
***** Running training *****
  Num examples = 495
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 40


Fold 2 completed. Eval Result: {'eval_loss': 0.8613883256912231, 'eval_accuracy': 0.6854838709677419, 'eval_precision': 0.62039752362333, 'eval_recall': 0.6854838709677419, 'eval_f1': 0.6511166253101736, 'eval_runtime': 1.0034, 'eval_samples_per_second': 123.579, 'eval_steps_per_second': 1.993, 'epoch': 5.0}
Starting fold 3/5




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.277609,0.919355,0.916888,0.919355,0.9174
2,0.435900,0.287512,0.887097,0.881646,0.887097,0.874013


***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/scibert/results/fold_2/checkpoint-8
Configuration saved in /home/elson/scibert/results/fold_2/checkpoint-8/config.json
Model weights saved in /home/elson/scibert/results/fold_2/checkpoint-8/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/results/fold_2/checkpoint-8/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/results/fold_2/checkpoint-8/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 124
  Batch size = 16
Saving model checkpoint to /home/elson/scibert/results/fold_2/checkpoint-16
Configuration saved in /home/elson/scibert/results/fold_2/checkpoint-16/config.json
Model weights saved in /home/elson/scibert/results/fold_2/checkpoint-16/pytorch_model.bin
tokenizer config file saved in /home/elson/scibert/results/fold_2/checkpoint-16/tokenizer_config.json
Special tokens file saved in /home/elson/scibert/results/fo

In [None]:
from transformers import AutoModelForSequenceClassification

# Assuming test_dataset is prepared similarly to your training/validation datasets
test_dataset = MediClaimDataset(test_premises, test_hypothesis1, test_hypothesis2, test_hypothesis3, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16)

# Load the best model from this fold
model_path = f'/home/elson/scibert/model_fold_4'
model = AutoModelForSequenceClassification.from_pretrained(model_path).to('cuda:3')

# Evaluate on the test set
test_results = trainer.evaluate(test_dataset)
print(f"Test Results for Fold {fold + 1}: {test_results}")