In [1]:
import os

# from main import FeverLoader, PubhealthLoader, ClimateFeverLoader
from main import load_datasets

  from .autonotebook import tqdm as notebook_tqdm


## Setting
- 1: train on FEVER
- 2: train on pubhealth
- 3: train on climate

In [2]:
models = {
    "BERT": "bert-base-uncased",
    "RoBERTa": "roberta-base",
    "ALBERT": "albert-base-v1",
    "SciBERT": "allenai/scibert_scivocab_uncased",
    "BioBERT": "dmis-lab/biobert-base-cased-v1.2"
}
datasets = ["FEVER", "PUBHEALTH", "CLIMATE"]

In [3]:
# generate model_dirs
def gen_experiment_params():
    for model_name, model_path in models.items():
        for ds in datasets:
            yield (model_path, f"../models/{model_name}_{ds}_V3")


In [4]:
list(gen_experiment_params())

[('bert-base-uncased', '../models/BERT_FEVER_V3'),
 ('bert-base-uncased', '../models/BERT_PUBHEALTH_V3'),
 ('bert-base-uncased', '../models/BERT_CLIMATE_V3'),
 ('roberta-base', '../models/RoBERTa_FEVER_V3'),
 ('roberta-base', '../models/RoBERTa_PUBHEALTH_V3'),
 ('roberta-base', '../models/RoBERTa_CLIMATE_V3'),
 ('albert-base-v1', '../models/ALBERT_FEVER_V3'),
 ('albert-base-v1', '../models/ALBERT_PUBHEALTH_V3'),
 ('albert-base-v1', '../models/ALBERT_CLIMATE_V3'),
 ('allenai/scibert_scivocab_uncased', '../models/SciBERT_FEVER_V3'),
 ('allenai/scibert_scivocab_uncased', '../models/SciBERT_PUBHEALTH_V3'),
 ('allenai/scibert_scivocab_uncased', '../models/SciBERT_CLIMATE_V3'),
 ('dmis-lab/biobert-base-cased-v1.2', '../models/BioBERT_FEVER_V3'),
 ('dmis-lab/biobert-base-cased-v1.2', '../models/BioBERT_PUBHEALTH_V3'),
 ('dmis-lab/biobert-base-cased-v1.2', '../models/BioBERT_CLIMATE_V3')]

In [5]:
experiment = "3"
model_dirs = {
    "1": "../models/BERT_FEVER_V3",
    "2": "../models/BERT_PUBHEALTH_V3",
    "3": "../models/BERT_CLIMATE_V3"
}

## Load Data
- ds1: train on FEVER
- ds2: train on pubhealth
- ds3: train on climate

In [3]:
root = '../data_2023_06_02'

fever_dir = os.path.join(root, 'preprocessed/FEVER')
pubhealth_dir = os.path.join(root, 'preprocessed/PUBHEALTH')
climate_dir = os.path.join(root, 'preprocessed/CLIMATE-FEVER')

In [4]:
ds1, ds2, ds3, ds_test = load_datasets(fever_dir, pubhealth_dir, climate_dir)

## Training

In [5]:
model_name = "bert-base-uncased" #https://huggingface.co/bert-base-uncased

#### Tokenize data

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
def preprocess_function(samples):
    return tokenizer(samples['claim'], samples['evidence'], 
                     padding=True,
                     truncation='only_second', 
                     max_length=512)

In [7]:
if experiment == "1":
    ds = ds1
elif experiment == "2":
    ds = ds2
elif experiment == "3":
    ds = ds3
else:
    raise ValueError("Unknown Experiment")

In [8]:
encoded_ds = ds.map(preprocess_function, batched=True)

                                                                 

In [9]:
print(encoded_ds)

DatasetDict({
    train: Dataset({
        features: ['claim', 'evidence', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8370
    })
    validation: Dataset({
        features: ['claim', 'evidence', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1050
    })
    fever_test: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 9999
    })
    pubhealth_test: Dataset({
        features: ['claim', 'evidence', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1032
    })
    climate_test: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 200
    })
})


#### Setup model

In [10]:
from transformers import AutoModelForSequenceClassification

In [38]:
num_labels = 3 
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
print(model)

loading configuration file config.json from cache at /users/k21193529/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loa

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

#### Setup trainer

In [34]:
import numpy as np
import torch
from torch import nn
import evaluate 
from transformers import TrainingArguments, Trainer

In [39]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references = labels)

In [40]:
batch_size = 8 
num_epochs = 25

model_dir = model_dirs.get(experiment)
print(model_dir)

../models/BERT_PUBHEALTH_V3


In [41]:
args = TrainingArguments(
    model_dir,
    evaluation_strategy = "epoch",
    # evaluation_strategy = "steps",
    save_strategy = "epoch",
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    num_train_epochs = num_epochs,
    load_best_model_at_end = True,
    logging_strategy = "epoch",
    save_total_limit = 2,
)   

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [42]:
#create trainer with weighted loss
from sklearn.utils import class_weight
import numpy as np

In [77]:
class_weights = class_weight.compute_class_weight(
    "balanced", 
    classes=np.array([0,1,2]), 
    y=encoded_ds["train"]["label"]
)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

In [79]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        
        loss_fct = nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [80]:
trainer = CustomTrainer(
    model, 
    args,
    train_dataset = encoded_ds["train"],
    eval_dataset = encoded_ds["validation"],
    tokenizer = tokenizer, 
    compute_metrics = compute_metrics
)

#### Train model

In [81]:
# import torch
torch.cuda.empty_cache()

In [82]:
best_model_path = os.path.join(model_dir, 'best_model')
print(best_model_path)

trainer.train()
trainer.save_model(best_model_path)

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8370
  Num Epochs = 25
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6550
  Number of trainable parameters = 109484547


../models/BERT_PUBHEALTH_V3/best_model




Epoch,Training Loss,Validation Loss,Accuracy
1,0.8193,0.935282,0.759048
2,0.6191,0.567936,0.754286
3,0.3915,0.83345,0.810476
4,0.2401,1.508349,0.829524
5,0.1245,1.454884,0.799048
6,0.0614,2.166759,0.821905
7,0.0426,2.43497,0.820952
8,0.022,2.584445,0.82381
9,0.0135,2.893629,0.811429
10,0.0295,3.566793,0.787619


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1050
  Batch size = 32
Saving model checkpoint to ../models/BERT_PUBHEALTH_V3/checkpoint-262
Configuration saved in ../models/BERT_PUBHEALTH_V3/checkpoint-262/config.json
Model weights saved in ../models/BERT_PUBHEALTH_V3/checkpoint-262/pytorch_model.bin
tokenizer config file saved in ../models/BERT_PUBHEALTH_V3/checkpoint-262/tokenizer_config.json
Special tokens file saved in ../models/BERT_PUBHEALTH_V3/checkpoint-262/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassif

In [83]:
trainer.evaluate(encoded_ds['fever_test'])

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 9999
  Batch size = 32


{'eval_loss': 0.8770738244056702,
 'eval_accuracy': 0.5684568456845684,
 'eval_runtime': 33.5613,
 'eval_samples_per_second': 297.933,
 'eval_steps_per_second': 9.326,
 'epoch': 25.0}

In [84]:
trainer.evaluate(encoded_ds['pubhealth_test'])

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1032
  Batch size = 32


{'eval_loss': 0.743153989315033,
 'eval_accuracy': 0.7257751937984496,
 'eval_runtime': 3.5351,
 'eval_samples_per_second': 291.933,
 'eval_steps_per_second': 9.335,
 'epoch': 25.0}

In [85]:
trainer.evaluate(encoded_ds['climate_test'])

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: claim, evidence. If claim, evidence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 200
  Batch size = 32


{'eval_loss': 2.758296489715576,
 'eval_accuracy': 0.315,
 'eval_runtime': 0.6893,
 'eval_samples_per_second': 290.164,
 'eval_steps_per_second': 10.156,
 'epoch': 25.0}