In [30]:
from datasets import load_dataset


ds_weak_train = load_dataset("google/boolq", split="train[:50%]")
ds_weak_labels = load_dataset("google/boolq", split="train[50%:]")
ds_weak_test = load_dataset("google/boolq", split="validation")
ds_weak_train = ds_weak_train.rename_column("answer", "label")
ds_weak_labels = ds_weak_labels.rename_column("answer", "label")
ds_weak_test = ds_weak_test.rename_column("answer", "label")



In [31]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")


def tokenize_function(examples):
    return tokenizer(examples["question"], examples['passage'], padding="max_length", truncation=True)


tokenized_datasets_train = ds_weak_train.map(tokenize_function, batched=True)
tokenized_datasets_test = ds_weak_test.map(tokenize_function, batched=True)

Map: 100%|██████████| 3270/3270 [00:00<00:00, 4602.60 examples/s]


In [32]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [33]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer_boolq")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [16]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

In [23]:
def compute_metrics(eval_pred):
    logits, answers = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=answers)

In [24]:
ds_weak_labels

Dataset({
    features: ['question', 'label', 'passage'],
    num_rows: 4713
})

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="test_trainer_boolq", 
    eval_strategy="epoch",
    num_train_epochs=10)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [28]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets_train,
    eval_dataset=tokenized_datasets_test,
    compute_metrics=compute_metrics
)

In [29]:
trainer.train()

  0%|          | 0/2360 [26:20<?, ?it/s]
 21%|██        | 500/2360 [03:20<12:32,  2.47it/s]

{'loss': 0.3284, 'grad_norm': 15.955248832702637, 'learning_rate': 3.940677966101695e-05, 'epoch': 0.85}


                                                  
 25%|██▌       | 590/2360 [04:53<09:30,  3.10it/s]

{'eval_loss': 1.5451141595840454, 'eval_accuracy': 0.6391437308868502, 'eval_runtime': 53.1915, 'eval_samples_per_second': 61.476, 'eval_steps_per_second': 7.689, 'epoch': 1.0}


 42%|████▏     | 1000/2360 [07:38<09:16,  2.44it/s] 

{'loss': 0.1567, 'grad_norm': 0.04890475794672966, 'learning_rate': 2.88135593220339e-05, 'epoch': 1.69}


                                                   
 50%|█████     | 1180/2360 [09:46<06:21,  3.09it/s]

{'eval_loss': 1.7466758489608765, 'eval_accuracy': 0.6700305810397553, 'eval_runtime': 54.2736, 'eval_samples_per_second': 60.25, 'eval_steps_per_second': 7.536, 'epoch': 2.0}


 64%|██████▎   | 1500/2360 [11:54<05:33,  2.58it/s]  

{'loss': 0.0788, 'grad_norm': 0.7010005712509155, 'learning_rate': 1.8220338983050846e-05, 'epoch': 2.54}


                                                   
 75%|███████▌  | 1771/2360 [14:32<2:32:33, 15.54s/it]

{'eval_loss': 2.2002763748168945, 'eval_accuracy': 0.6795107033639144, 'eval_runtime': 50.887, 'eval_samples_per_second': 64.26, 'eval_steps_per_second': 8.037, 'epoch': 3.0}


 85%|████████▍ | 2000/2360 [16:00<02:19,  2.59it/s]  

{'loss': 0.0514, 'grad_norm': 0.004425466060638428, 'learning_rate': 7.627118644067798e-06, 'epoch': 3.39}


                                                   
100%|██████████| 2360/2360 [19:15<00:00,  2.04it/s]

{'eval_loss': 2.485921859741211, 'eval_accuracy': 0.6629969418960244, 'eval_runtime': 51.9836, 'eval_samples_per_second': 62.904, 'eval_steps_per_second': 7.868, 'epoch': 4.0}
{'train_runtime': 1155.5982, 'train_samples_per_second': 16.317, 'train_steps_per_second': 2.042, 'train_loss': 0.13574910083059538, 'epoch': 4.0}





TrainOutput(global_step=2360, training_loss=0.13574910083059538, metrics={'train_runtime': 1155.5982, 'train_samples_per_second': 16.317, 'train_steps_per_second': 2.042, 'total_flos': 4961222059868160.0, 'train_loss': 0.13574910083059538, 'epoch': 4.0})