In [1]:
import os

# from main import FeverLoader, PubhealthLoader, ClimateFeverLoader
from main import load_datasets

  from .autonotebook import tqdm as notebook_tqdm


## Setting
- 1: train on FEVER
- 2: train on pubhealth
- 3: train on climate

In [2]:
experiment = "3"
model_dirs = {
    "1": "../models/BERT_FEVER",
    "2": "../models/BERT_PUBHEALTH",
    # "3": "../models/BERT_CLIMATE"
    # "3": "../models/BERT_CLIMATE_V2_best_accuracy"
    "3": "../models/BERT_CLIMATE_V2_best_loss"
}

## Load Data
- ds1: train on FEVER
- ds2: train on pubhealth
- ds3: train on climate

In [3]:
root = '../data_2023_06_02'

fever_dir = os.path.join(root, 'preprocessed/FEVER')
pubhealth_dir = os.path.join(root, 'preprocessed/PUBHEALTH')
climate_dir = os.path.join(root, 'preprocessed/CLIMATE-FEVER')

In [4]:
# climate_params = {
#     'dev_size': 150,
#     'test_size': 150,
#     'random_state': 88
# }

ds1, ds2, ds3, ds_test = load_datasets(fever_dir, pubhealth_dir, climate_dir)

## Training

In [5]:
model_name = "bert-base-uncased" #https://huggingface.co/bert-base-uncased
# model_name = "bert-large-uncased" #https://huggingface.co/bert-large-uncased

#### Tokenize data

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
def preprocess_function(samples):
    return tokenizer(samples['claim'], samples['evidence'], 
                     padding=True,
                     truncation='only_second', 
                     max_length=512)

In [7]:
if experiment == "1":
    ds = ds1
elif experiment == "2":
    ds = ds2
elif experiment == "3":
    ds = ds3
else:
    raise ValueError("Unknown Experiment")

In [8]:
encoded_ds = ds.map(preprocess_function, batched=True)

                                                                 

In [9]:
print(encoded_ds)

DatasetDict({
    train: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1081
    })
    validation: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 150
    })
    fever_test: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 9999
    })
    pubhealth_test: Dataset({
        features: ['claim', 'evidence', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1032
    })
    climate_test: Dataset({
        features: ['claim', 'label', 'evidence', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 150
    })
})


#### Setup model

In [10]:
from transformers import AutoModelForSequenceClassification

In [11]:
num_labels = 3 
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
print(model)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

#### Setup trainer

In [12]:
import numpy as np
import evaluate 
from transformers import TrainingArguments, Trainer

In [13]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references = labels)

In [14]:
batch_size = 8 
num_epochs = 25

model_dir = model_dirs.get(experiment)
print(model_dir)

../models/BERT_CLIMATE_V2_best_loss


In [15]:
args = TrainingArguments(
    model_dir,
    evaluation_strategy = "epoch",
    # evaluation_strategy = "steps",
    save_strategy = "epoch",
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    num_train_epochs = num_epochs,
    load_best_model_at_end = True,
    logging_strategy = "epoch",
    save_total_limit = 2,
    # metric_for_best_model = "accuracy"
)   

In [16]:
trainer = Trainer(
    model, 
    args,
    train_dataset = encoded_ds["train"],
    eval_dataset = encoded_ds["validation"],
    tokenizer = tokenizer, 
    compute_metrics = compute_metrics
)

#### Train model

In [17]:
import torch
torch.cuda.empty_cache()

In [18]:
best_model_path = os.path.join(model_dir, 'best_model')
print(best_model_path)

trainer.train()
trainer.save_model(best_model_path)

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: evidence, claim. If evidence, claim are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 1081
  Num Epochs = 25
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 850
  Number of trainable parameters = 109484547
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


../models/BERT_CLIMATE_V2_best_loss/best_model




Epoch,Training Loss,Validation Loss,Accuracy
1,1.0226,1.004552,0.5
2,0.885,1.016841,0.493333
3,0.6474,1.041688,0.566667
4,0.366,1.311622,0.56
5,0.1801,1.709816,0.553333
6,0.1054,1.874791,0.546667
7,0.0647,2.221457,0.566667
8,0.0995,2.189667,0.526667
9,0.0297,2.319172,0.566667
10,0.0234,2.585561,0.546667


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: evidence, claim. If evidence, claim are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 150
  Batch size = 32
Saving model checkpoint to ../models/BERT_CLIMATE_V2_best_loss/checkpoint-34
Configuration saved in ../models/BERT_CLIMATE_V2_best_loss/checkpoint-34/config.json
Model weights saved in ../models/BERT_CLIMATE_V2_best_loss/checkpoint-34/pytorch_model.bin
tokenizer config file saved in ../models/BERT_CLIMATE_V2_best_loss/checkpoint-34/tokenizer_config.json
Special tokens file saved in ../models/BERT_CLIMATE_V2_best_loss/checkpoint-34/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: evidence, claim. If evidence, claim are not e

In [None]:
trainer.evaluate(encoded_ds['fever_test'])

In [None]:
trainer.evaluate(encoded_ds['pubhealth_test'])

In [None]:
trainer.evaluate(encoded_ds['climate_test'])