In [1]:
!pip install datasets transformers --quiet



In [3]:
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from datasets import load_dataset, load_metric


In [4]:
task = "stsb"
dataset = load_dataset("glue", task)
metric = load_metric('glue', task)

Found cached dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
100%|████████████████████████████████████████████| 3/3 [00:00<00:00, 952.60it/s]
  metric = load_metric('glue', task)


In [5]:
model_checkpoint = "prajjwal1/bert-tiny"
batch_size = 16

In [6]:
print("Sample from the train, validation and test dataset:")
# Train Dataset
print("Train Sample",dataset["train"][0])
# Valid Dataset
print("Validation Sample",dataset["validation"][0])
# Test Dataset
# Label of test set is -1
print("Test Sample",dataset["test"][0])

print("\nSamples in dataset:")
# Train Dataset
print("Train Sample",len(dataset["train"]))
# Valid Dataset
print("Validation Sample",len(dataset["validation"]))
# Test Dataset
print("Test Sample",len(dataset["test"]))

Sample from the train, validation and test dataset:
Train Sample {'sentence1': 'A plane is taking off.', 'sentence2': 'An air plane is taking off.', 'label': 5.0, 'idx': 0}
Validation Sample {'sentence1': 'A man with a hard hat is dancing.', 'sentence2': 'A man wearing a hard hat is dancing.', 'label': 5.0, 'idx': 0}
Test Sample {'sentence1': 'A girl is styling her hair.', 'sentence2': 'A girl is brushing her hair.', 'label': -1.0, 'idx': 0}

Samples in dataset:
Train Sample 5749
Validation Sample 1500
Test Sample 1379


In [7]:
import datasets
import random
import pandas as pd
from IPython.display import display, HTML

def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)
    
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))


In [8]:
show_random_elements(dataset["train"])

Unnamed: 0,sentence1,sentence2,label,idx
0,A woman riding a brown horse.,A person dressed in white on a brown horse.,3.2,1206
1,"""Fairies don't exist"" - fine.","""Leprechauns don't exist"" - fine.",1.0,2122
2,Syrian forces move to retake Aleppo,Syrian Regime Bids To Retake City Of Aleppo,4.4,4286
3,"They reported symptoms of fever, headache, rash and muscle aches.","Symptoms include a stiff neck, fever, headache and sensitivity to light.",2.333,3291
4,A group of people are wearing bunny ears.,A group of people ride in a race,0.8,1522
5,"World News Update, what you need to know",Labour's 50p tax rate: what you need to know,0.6,5487
6,A black dog is chasing after a red Frisbee.,A black poodle is running in the snow.,1.4,1883
7,British stuntman fell to his death in wing suit mishap,British stuntman dies in wingsuit mishap in Alps,4.0,5591
8,A man is giving a lecture on a podium.,A man is delivering a lecture at an event celebrating the game of cricket.,2.333,899
9,I think you need to reread what I wrote.,You need to reread what I wrote.,5.0,2254


In [9]:
from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [10]:
def preprocess_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"],  padding = "longest", truncation=True,     return_tensors="pt" )

In [11]:
preprocess_function(dataset['train'][:5])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'input_ids': tensor([[  101,  1037,  4946,  2003,  2635,  2125,  1012,   102,  2019,  2250,
          4946,  2003,  2635,  2125,  1012,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  1037,  2158,  2003,  2652,  1037,  2312,  8928,  1012,   102,
          1037,  2158,  2003,  2652,  1037,  8928,  1012,   102,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  1037,  2158,  2003,  9359, 14021,  5596,  2098,  8808,  2006,
          1037, 10733,  1012,   102,  1037,  2158,  2003,  9359, 29022,  8808,
          2006,  2019,  4895,  3597, 23461, 10733,  1012,   102],
        [  101,  2093,  2273,  2024,  2652,  7433,  1012,   102,  2048,  2273,
          2024,  2652,  7433,  1012,   102,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0],
        [  101,  1037,  2158,  2003,  2652,  1996, 10145,  1012,   102,  1037,
          2

In [12]:
encoded_dataset = dataset.map(preprocess_function, batched=True)

Loading cached processed dataset at /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-714129d02f1aa804.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-c77836e1e332585f.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-02bdb43b1392288c.arrow


In [13]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
num_labels  = 1
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initia

In [14]:
metric_name = "pearson"

In [15]:
metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
model_name = model_checkpoint.split("/")[-1]

args = TrainingArguments(
    f"{model_name}-finetuned-{task}",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=100,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    save_total_limit = 1,
)

In [19]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions[:, 0]
    return metric.compute(predictions=predictions, references=labels)

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [None]:
tokens_train = preprocess_function(dataset["train"][:5])

In [None]:
dataset["train"][:5]

In [None]:
model(input_ids = tokens_train["input_ids"].to("cuda"), attention_mask = tokens_train["attention_mask"].to("cuda"))

In [21]:
! pip install optuna

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [22]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)


In [23]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file https://huggingface.co/prajjwal1/bert-tiny/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3cf34679007e9fe5d0acd644dcc1f4b26bec5cbc9612364f6da7262aed4ef7a4.a5a11219cf90aae61ff30e1658ccf2cb4aa84d6b6e947336556f887c9828dc6d
Model config BertConfig {
  "_name_or_path": "prajjwal1/bert-tiny",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/prajjwal1/bert-tiny/reso

In [24]:
train_dataset = encoded_dataset["train"].shard(index=1, num_shards=10) 


In [25]:
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")


[32m[I 2023-04-11 23:17:19,453][0m A new study created in memory with name: no-name-2668f68b-7a35-45c5-b6f6-c59008b3da51[0m
Trial:
loading configuration file https://huggingface.co/prajjwal1/bert-tiny/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3cf34679007e9fe5d0acd644dcc1f4b26bec5cbc9612364f6da7262aed4ef7a4.a5a11219cf90aae61ff30e1658ccf2cb4aa84d6b6e947336556f887c9828dc6d
Model config BertConfig {
  "_name_or_path": "prajjwal1/bert-tiny",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.3",
  "t

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,3.362831,0.414522,0.362211
2,No log,2.438462,0.697794,0.722175
3,4.189900,2.059964,0.716513,0.700277
4,4.189900,1.915194,0.713139,0.686748


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-0/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-0/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-0/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-0/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-0/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,4.595453,0.136625,0.051964
2,No log,3.107267,0.511894,0.476857
3,5.492000,2.686488,0.536811,0.508233
4,5.492000,2.518337,0.576352,0.570043
5,5.492000,2.47051,0.593573,0.596531


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-1/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-1/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-1/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-1/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-1/checkpoint-180/special_tokens_map.json
Deleting older checkpoint [bert-tiny-finetuned-stsb/run-1/checkpoint-900] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Ber

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,6.343458,-0.046544,-0.077112


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-2/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-2/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-2/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-2/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-2/checkpoint-180/special_tokens_map.json
Deleting older checkpoint [bert-tiny-finetuned-stsb/run-2/checkpoint-360] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


L

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,6.293337,-0.187142,-0.163805
2,No log,4.905303,-0.222646,-0.224608
3,7.349600,3.966479,0.253885,0.289387
4,7.349600,3.597628,0.350308,0.38819
5,7.349600,3.503445,0.373631,0.409096


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-3/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-3/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-3/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-3/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-3/checkpoint-180/special_tokens_map.json
Deleting older checkpoint [bert-tiny-finetuned-stsb/run-3/checkpoint-540] due to args.save_total_limit
The following columns in the evaluation set don't have a corresponding argument in `Ber

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,6.428365,-0.252321,-0.241678
2,No log,5.36671,0.060591,0.096025
3,7.552400,4.767937,0.161832,0.202289
4,7.552400,4.467846,0.228187,0.257224
5,7.552400,4.381382,0.248321,0.27205


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-4/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-4/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-4/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-4/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-4/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,2.252,0.672409,0.712635


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-5/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-5/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-5/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-5/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-5/checkpoint-180/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from bert-tiny-finetuned-stsb/run-5/checkpoint-180 (score: 0.6724087621774145).
[32m

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,4.63771,0.384784,0.362759
2,No log,3.338796,0.467932,0.470027
3,5.697800,2.89988,0.441559,0.436102
4,5.697800,2.792563,0.503317,0.497677


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-6/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-6/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-6/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-6/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-6/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,5.346148,0.313587,0.174203
2,No log,3.645181,0.480367,0.384226


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-7/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-7/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-7/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-7/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-7/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,4.60668,0.440252,0.392956
2,No log,3.818135,0.395838,0.296588


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-8/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-8/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-8/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-8/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-8/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,2.120394,0.740673,0.738816
2,No log,1.18681,0.722688,0.715892
3,2.487100,0.99699,0.768851,0.791921
4,2.487100,0.988548,0.772805,0.786968
5,2.487100,0.999806,0.7758,0.791835


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/run-9/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/run-9/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/run-9/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/run-9/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/run-9/checkpoint-180/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1

In [26]:
best_run

BestRun(run_id='9', objective=1.5676357556142644, hyperparameters={'learning_rate': 5.266874265634887e-05, 'num_train_epochs': 5, 'seed': 40, 'per_device_train_batch_size': 32})

In [27]:

best_run

for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()

loading configuration file https://huggingface.co/prajjwal1/bert-tiny/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3cf34679007e9fe5d0acd644dcc1f4b26bec5cbc9612364f6da7262aed4ef7a4.a5a11219cf90aae61ff30e1658ccf2cb4aa84d6b6e947336556f887c9828dc6d
Model config BertConfig {
  "_name_or_path": "prajjwal1/bert-tiny",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/prajjwal1/bert-tiny/reso

Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,2.120394,0.740673,0.738816
2,No log,1.18681,0.722688,0.715892
3,2.487100,0.99699,0.768851,0.791921
4,2.487100,0.988548,0.772805,0.786968
5,2.487100,0.999806,0.7758,0.791835


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 32
Saving model checkpoint to bert-tiny-finetuned-stsb/checkpoint-180
Configuration saved in bert-tiny-finetuned-stsb/checkpoint-180/config.json
Model weights saved in bert-tiny-finetuned-stsb/checkpoint-180/pytorch_model.bin
tokenizer config file saved in bert-tiny-finetuned-stsb/checkpoint-180/tokenizer_config.json
Special tokens file saved in bert-tiny-finetuned-stsb/checkpoint-180/special_tokens_map.json
Deleting older checkpoint [bert-tiny-finetuned-stsb/checkpoint-6300] due to args.save_total_limit
Deleting older checkpoint [bert-tiny-finetuned-stsb/checkpoint-7380] due to args.save_total_limit
The following columns in

TrainOutput(global_step=900, training_loss=1.7975625610351562, metrics={'train_runtime': 27.9504, 'train_samples_per_second': 1028.428, 'train_steps_per_second': 32.2, 'total_flos': 8907396989022.0, 'train_loss': 1.7975625610351562, 'epoch': 5.0})