In [3]:
from transformers import (BertTokenizer, AutoTokenizer, AutoModelForTokenClassification,
                          BertForTokenClassification, DataCollatorForTokenClassification, TrainingArguments,
                          Trainer)

from datasets import load_from_disk, load_metric

import os
import numpy as np

In [18]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [4]:
wiki_datasets = load_from_disk("/home/abdullah/Code/dl/lt_bert/dataset/wikiann")

metric = load_metric("seqeval")

wiki_datasets

DatasetDict({
    validation: Dataset({
        features: ['tokens', 'ner_tags', 'langs', 'spans'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['tokens', 'ner_tags', 'langs', 'spans'],
        num_rows: 1000
    })
    train: Dataset({
        features: ['tokens', 'ner_tags', 'langs', 'spans'],
        num_rows: 10000
    })
})

In [5]:
MODEL_NAME = "/home/abdullah/Code/dl/lt_bert/best_models/best_tiny_bert"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

print(tokenizer.is_fast)

inputs = tokenizer(wiki_datasets["train"][0]
                   ["tokens"], is_split_into_words=True)



True


In [7]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels


In [8]:
labels = wiki_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))


[5, 6, 6]
[-100, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -100]


In [9]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs


In [10]:
tokenized_datasets = wiki_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=wiki_datasets["train"].column_names,
)


Loading cached processed dataset at /home/abdullah/Code/dl/lt_bert/dataset/wikiann/validation/cache-4422150ff2692120.arrow
  0%|          | 0/1 [00:00<?, ?ba/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 1/1 [00:00<00:00, 12.59ba/s]
Loading cached processed dataset at /home/abdullah/Code/dl/lt_bert/dataset/wikiann/train/cache-e7e9c9e53542a14a.arrow


In [11]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

label_names = wiki_datasets["train"].features["ner_tags"].feature.names
labels = wiki_datasets["train"][0]["ner_tags"]
labels = [label_names[i] for i in labels]
labels


['B-LOC', 'I-LOC', 'I-LOC']

In [12]:
predictions = labels.copy()
predictions[1] = "I-LOC"
metric.compute(predictions=[predictions], references=[labels])


{'LOC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 1},
 'overall_precision': 1.0,
 'overall_recall': 1.0,
 'overall_f1': 1.0,
 'overall_accuracy': 1.0}

In [13]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100]
                   for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(
        predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }


In [14]:
id2label = {str(i): label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

model = AutoModelForTokenClassification.from_pretrained(
    MODEL_NAME,
    id2label=id2label,
    label2id=label2id,
)

model.config.num_labels

Some weights of the model checkpoint at /home/abdullah/Code/dl/lt_bert/best_models/best_tiny_bert were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at

7

In [16]:
os.environ["WANDB_DISABLED"] = "true"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

args = TrainingArguments(
    output_dir="temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()


using `logging_steps` to initialize `eval_steps` to 400
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 10000
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 5300
  8%|▊         | 400/5300 [00:19<04:13, 19.30it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.9667, 'learning_rate': 4e-05, 'epoch': 7.55}


                                                  
  8%|▊         | 400/5300 [00:20<04:13, 19.30it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json
Model weights saved in temp/checkpoint-400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-400/special_tokens_map.json


{'eval_loss': 0.8111760020256042, 'eval_precision': 0.16948378254910917, 'eval_recall': 0.33514001806684734, 'eval_f1': 0.22512135922330098, 'eval_accuracy': 0.6978030746316294, 'eval_runtime': 0.507, 'eval_samples_per_second': 1972.389, 'eval_steps_per_second': 5.917, 'epoch': 7.55}


 15%|█▌        | 800/5300 [00:40<03:28, 21.57it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.8335, 'learning_rate': 4.6875e-05, 'epoch': 15.09}


                                                  
 15%|█▌        | 800/5300 [00:41<03:28, 21.57it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.6798779368400574, 'eval_precision': 0.24538934426229508, 'eval_recall': 0.4327009936766034, 'eval_f1': 0.31317423994769533, 'eval_accuracy': 0.7457843502313953, 'eval_runtime': 0.4684, 'eval_samples_per_second': 2134.771, 'eval_steps_per_second': 6.404, 'epoch': 15.09}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-800/special_tokens_map.json
 23%|██▎       | 1200/5300 [01:02<03:01, 22.54it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.7275, 'learning_rate': 4.270833333333333e-05, 'epoch': 22.64}


                                                   
 23%|██▎       | 1200/5300 [01:02<03:01, 22.54it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json
Model weights saved in temp/checkpoint-1200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-1200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-1200/special_tokens_map.json


{'eval_loss': 0.5764256119728088, 'eval_precision': 0.3648414985590778, 'eval_recall': 0.5718157181571816, 'eval_f1': 0.4454609429978888, 'eval_accuracy': 0.7919570189903719, 'eval_runtime': 0.4646, 'eval_samples_per_second': 2152.193, 'eval_steps_per_second': 6.457, 'epoch': 22.64}


 30%|███       | 1600/5300 [01:23<02:38, 23.36it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.6254, 'learning_rate': 3.854166666666667e-05, 'epoch': 30.19}


                                                   
 30%|███       | 1600/5300 [01:23<02:38, 23.36it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-1600/tokenizer_config.json
Special tokens file saved in temp/checkpoint-1600/special_tokens_map.json


{'eval_loss': 0.5031043887138367, 'eval_precision': 0.38370786516853933, 'eval_recall': 0.6169828364950316, 'eval_f1': 0.4731555247661933, 'eval_accuracy': 0.8224905580084046, 'eval_runtime': 0.4623, 'eval_samples_per_second': 2162.965, 'eval_steps_per_second': 6.489, 'epoch': 30.19}


 38%|███▊      | 2000/5300 [01:43<02:51, 19.26it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5445, 'learning_rate': 3.4375e-05, 'epoch': 37.74}


                                                   
 38%|███▊      | 2000/5300 [01:44<02:51, 19.26it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.43247777223587036, 'eval_precision': 0.4560099132589839, 'eval_recall': 0.6648599819331527, 'eval_f1': 0.5409775817714076, 'eval_accuracy': 0.8458960583009735, 'eval_runtime': 0.4664, 'eval_samples_per_second': 2143.926, 'eval_steps_per_second': 6.432, 'epoch': 37.74}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2000/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2000/special_tokens_map.json
 45%|████▌     | 2400/5300 [02:04<02:13, 21.71it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4843, 'learning_rate': 3.0208333333333334e-05, 'epoch': 45.28}


                                                   
 45%|████▌     | 2400/5300 [02:05<02:13, 21.71it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json
Model weights saved in temp/checkpoint-2400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2400/special_tokens_map.json


{'eval_loss': 0.3974553942680359, 'eval_precision': 0.472, 'eval_recall': 0.6928635953026197, 'eval_f1': 0.5614934114202049, 'eval_accuracy': 0.8623331028246183, 'eval_runtime': 0.4593, 'eval_samples_per_second': 2177.042, 'eval_steps_per_second': 6.531, 'epoch': 45.28}


 53%|█████▎    | 2800/5300 [02:24<02:09, 19.33it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4423, 'learning_rate': 2.604166666666667e-05, 'epoch': 52.83}


                                                   
 53%|█████▎    | 2800/5300 [02:25<02:09, 19.33it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.37996014952659607, 'eval_precision': 0.49625468164794007, 'eval_recall': 0.7181571815718157, 'eval_f1': 0.5869324473975637, 'eval_accuracy': 0.8710037767966381, 'eval_runtime': 0.4618, 'eval_samples_per_second': 2165.552, 'eval_steps_per_second': 6.497, 'epoch': 52.83}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2800/special_tokens_map.json
 60%|██████    | 3200/5300 [02:46<01:46, 19.70it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4116, 'learning_rate': 2.1875e-05, 'epoch': 60.38}


                                                   
 60%|██████    | 3200/5300 [02:46<01:46, 19.70it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.35795339941978455, 'eval_precision': 0.5341130604288499, 'eval_recall': 0.7425474254742548, 'eval_f1': 0.6213151927437642, 'eval_accuracy': 0.8815362519282941, 'eval_runtime': 0.5033, 'eval_samples_per_second': 1986.725, 'eval_steps_per_second': 5.96, 'epoch': 60.38}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-3200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-3200/special_tokens_map.json
 68%|██████▊   | 3600/5300 [03:06<01:26, 19.74it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.3861, 'learning_rate': 1.7708333333333335e-05, 'epoch': 67.92}


                                                   
 68%|██████▊   | 3600/5300 [03:07<01:26, 19.74it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json
Model weights saved in temp/checkpoint-3600/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-3600/tokenizer_config.json
Special tokens file saved in temp/checkpoint-3600/special_tokens_map.json


{'eval_loss': 0.3485967516899109, 'eval_precision': 0.5658602150537635, 'eval_recall': 0.7606142728093948, 'eval_f1': 0.6489402697495184, 'eval_accuracy': 0.8856854087983403, 'eval_runtime': 0.4601, 'eval_samples_per_second': 2173.371, 'eval_steps_per_second': 6.52, 'epoch': 67.92}


 75%|███████▌  | 4000/5300 [03:27<01:01, 21.13it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.3685, 'learning_rate': 1.3541666666666666e-05, 'epoch': 75.47}


                                                   
 75%|███████▌  | 4000/5300 [03:28<01:01, 21.13it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.3416668176651001, 'eval_precision': 0.5757780784844384, 'eval_recall': 0.7687443541102078, 'eval_f1': 0.6584139264990329, 'eval_accuracy': 0.8901005372626204, 'eval_runtime': 0.4541, 'eval_samples_per_second': 2202.105, 'eval_steps_per_second': 6.606, 'epoch': 75.47}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4000/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4000/special_tokens_map.json
 83%|████████▎ | 4400/5300 [03:48<00:40, 22.16it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.3586, 'learning_rate': 9.375000000000001e-06, 'epoch': 83.02}


                                                   
 83%|████████▎ | 4400/5300 [03:48<00:40, 22.16it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json


{'eval_loss': 0.3337101936340332, 'eval_precision': 0.5770006724949562, 'eval_recall': 0.7750677506775068, 'eval_f1': 0.661526599845798, 'eval_accuracy': 0.8914303952337891, 'eval_runtime': 0.4972, 'eval_samples_per_second': 2011.268, 'eval_steps_per_second': 6.034, 'epoch': 83.02}


Model weights saved in temp/checkpoint-4400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4400/special_tokens_map.json
 91%|█████████ | 4800/5300 [04:10<00:21, 22.88it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.3495, 'learning_rate': 5.208333333333334e-06, 'epoch': 90.57}


                                                   
 91%|█████████ | 4800/5300 [04:10<00:21, 22.88it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.33125820755958557, 'eval_precision': 0.5784511784511784, 'eval_recall': 0.7759710930442638, 'eval_f1': 0.6628086419753088, 'eval_accuracy': 0.8928666418426512, 'eval_runtime': 0.4607, 'eval_samples_per_second': 2170.488, 'eval_steps_per_second': 6.511, 'epoch': 90.57}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4800/special_tokens_map.json
 98%|█████████▊| 5200/5300 [04:32<00:04, 21.58it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.343, 'learning_rate': 1.0416666666666667e-06, 'epoch': 98.11}


                                                   
 98%|█████████▊| 5200/5300 [04:32<00:04, 21.58it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json


{'eval_loss': 0.32981517910957336, 'eval_precision': 0.5772849462365591, 'eval_recall': 0.7759710930442638, 'eval_f1': 0.6620423892100192, 'eval_accuracy': 0.8930794191180382, 'eval_runtime': 0.4551, 'eval_samples_per_second': 2197.438, 'eval_steps_per_second': 6.592, 'epoch': 98.11}


Model weights saved in temp/checkpoint-5200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-5200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-5200/special_tokens_map.json
100%|█████████▉| 5297/5300 [04:38<00:00, 20.29it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-5200 (score: 0.32981517910957336).
100%|██████████| 5300/5300 [04:38<00:00, 19.01it/s]

{'train_runtime': 278.7794, 'train_samples_per_second': 3587.065, 'train_steps_per_second': 19.011, 'train_loss': 0.5227531519475973, 'epoch': 100.0}





TrainOutput(global_step=5300, training_loss=0.5227531519475973, metrics={'train_runtime': 278.7794, 'train_samples_per_second': 3587.065, 'train_steps_per_second': 19.011, 'train_loss': 0.5227531519475973, 'epoch': 100.0})

In [17]:
os.environ["WANDB_DISABLED"] = "true"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

args = TrainingArguments(
    output_dir="temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=AutoModelForTokenClassification.from_pretrained("/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert", id2label=id2label,
                                                          label2id=label2id,),
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()


using `logging_steps` to initialize `eval_steps` to 400
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
loading configuration file /home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert/config.json
Model config BertConfig {
  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "

{'loss': 1.6435, 'learning_rate': 4e-05, 'epoch': 7.55}



  8%|▊         | 400/5300 [00:19<04:06, 19.88it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 1.207398772239685, 'eval_precision': 0.028063851699279094, 'eval_recall': 0.0984643179765131, 'eval_f1': 0.04367862151873372, 'eval_accuracy': 0.4309271769774988, 'eval_runtime': 0.451, 'eval_samples_per_second': 2217.497, 'eval_steps_per_second': 6.652, 'epoch': 7.55}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-400/special_tokens_map.json
 15%|█▌        | 800/5300 [00:39<03:22, 22.18it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 1.0163, 'learning_rate': 4.6875e-05, 'epoch': 15.09}



 15%|█▌        | 800/5300 [00:40<03:22, 22.18it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.8475487232208252, 'eval_precision': 0.12970261554998208, 'eval_recall': 0.32700993676603435, 'eval_f1': 0.1857362750128271, 'eval_accuracy': 0.6658332890047343, 'eval_runtime': 0.4592, 'eval_samples_per_second': 2177.675, 'eval_steps_per_second': 6.533, 'epoch': 15.09}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-800/special_tokens_map.json
 23%|██▎       | 1200/5300 [01:01<02:59, 22.83it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.8244, 'learning_rate': 4.270833333333333e-05, 'epoch': 22.64}



 23%|██▎       | 1200/5300 [01:01<02:59, 22.83it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.7048341631889343, 'eval_precision': 0.18990073370738023, 'eval_recall': 0.3974706413730804, 'eval_f1': 0.2570093457943925, 'eval_accuracy': 0.7375392308101495, 'eval_runtime': 0.4525, 'eval_samples_per_second': 2210.071, 'eval_steps_per_second': 6.63, 'epoch': 22.64}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-1200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-1200/special_tokens_map.json
 30%|███       | 1600/5300 [01:23<02:36, 23.71it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.7066, 'learning_rate': 3.854166666666667e-05, 'epoch': 30.19}



 30%|███       | 1600/5300 [01:23<02:36, 23.71it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 0.6260949969291687, 'eval_precision': 0.2568627450980392, 'eval_recall': 0.47335140018066846, 'eval_f1': 0.33301557038449314, 'eval_accuracy': 0.7715835948720676, 'eval_runtime': 0.4637, 'eval_samples_per_second': 2156.521, 'eval_steps_per_second': 6.47, 'epoch': 30.19}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-1600/tokenizer_config.json
Special tokens file saved in temp/checkpoint-1600/special_tokens_map.json
 38%|███▊      | 2000/5300 [01:45<02:36, 21.05it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.6389, 'learning_rate': 3.4375e-05, 'epoch': 37.74}



 38%|███▊      | 2000/5300 [01:45<02:36, 21.05it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.5744690895080566, 'eval_precision': 0.321061180292366, 'eval_recall': 0.5356820234869015, 'eval_f1': 0.4014895057549086, 'eval_accuracy': 0.8010532475131656, 'eval_runtime': 0.4624, 'eval_samples_per_second': 2162.6, 'eval_steps_per_second': 6.488, 'epoch': 37.74}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2000/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2000/special_tokens_map.json
 45%|████▌     | 2400/5300 [02:07<02:13, 21.65it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5941, 'learning_rate': 3.0208333333333334e-05, 'epoch': 45.28}



 45%|████▌     | 2400/5300 [02:07<02:13, 21.65it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.5405754446983337, 'eval_precision': 0.37801293356848914, 'eval_recall': 0.5808491418247516, 'eval_f1': 0.457977207977208, 'eval_accuracy': 0.815681685196021, 'eval_runtime': 0.468, 'eval_samples_per_second': 2136.735, 'eval_steps_per_second': 6.41, 'epoch': 45.28}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2400/special_tokens_map.json
 53%|█████▎    | 2800/5300 [02:29<02:08, 19.44it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5624, 'learning_rate': 2.604166666666667e-05, 'epoch': 52.83}



 53%|█████▎    | 2800/5300 [02:29<02:08, 19.44it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.5095313787460327, 'eval_precision': 0.41379310344827586, 'eval_recall': 0.6287262872628726, 'eval_f1': 0.49910362136966646, 'eval_accuracy': 0.8276504069365391, 'eval_runtime': 0.4961, 'eval_samples_per_second': 2015.609, 'eval_steps_per_second': 6.047, 'epoch': 52.83}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-2800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-2800/special_tokens_map.json
 60%|██████    | 3200/5300 [02:50<01:49, 19.23it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5366, 'learning_rate': 2.1875e-05, 'epoch': 60.38}



 60%|██████    | 3200/5300 [02:51<01:49, 19.23it/s]Saving model checkpoint to temp/checkpoint-3200
Configuration saved in temp/checkpoint-3200/config.json


{'eval_loss': 0.4920791685581207, 'eval_precision': 0.45517241379310347, 'eval_recall': 0.6558265582655827, 'eval_f1': 0.5373797187268691, 'eval_accuracy': 0.8355231661258578, 'eval_runtime': 0.4642, 'eval_samples_per_second': 2154.373, 'eval_steps_per_second': 6.463, 'epoch': 60.38}


Model weights saved in temp/checkpoint-3200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-3200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-3200/special_tokens_map.json
 68%|██████▊   | 3600/5300 [03:11<01:21, 20.81it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5204, 'learning_rate': 1.7708333333333335e-05, 'epoch': 67.92}



 68%|██████▊   | 3600/5300 [03:11<01:21, 20.81it/s]Saving model checkpoint to temp/checkpoint-3600
Configuration saved in temp/checkpoint-3600/config.json
Model weights saved in temp/checkpoint-3600/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-3600/tokenizer_config.json
Special tokens file saved in temp/checkpoint-3600/special_tokens_map.json


{'eval_loss': 0.47886762022972107, 'eval_precision': 0.470926517571885, 'eval_recall': 0.6657633242999097, 'eval_f1': 0.5516467065868264, 'eval_accuracy': 0.8402042661843715, 'eval_runtime': 0.4497, 'eval_samples_per_second': 2223.698, 'eval_steps_per_second': 6.671, 'epoch': 67.92}


 75%|███████▌  | 4000/5300 [03:31<01:02, 20.87it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.5049, 'learning_rate': 1.3541666666666666e-05, 'epoch': 75.47}



 75%|███████▌  | 4000/5300 [03:32<01:02, 20.87it/s]Saving model checkpoint to temp/checkpoint-4000
Configuration saved in temp/checkpoint-4000/config.json


{'eval_loss': 0.4702605605125427, 'eval_precision': 0.4983432736911862, 'eval_recall': 0.6793134598012647, 'eval_f1': 0.5749235474006117, 'eval_accuracy': 0.8447789776051917, 'eval_runtime': 0.4824, 'eval_samples_per_second': 2072.783, 'eval_steps_per_second': 6.218, 'epoch': 75.47}


Model weights saved in temp/checkpoint-4000/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4000/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4000/special_tokens_map.json
 83%|████████▎ | 4400/5300 [03:52<00:41, 21.79it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4946, 'learning_rate': 9.375000000000001e-06, 'epoch': 83.02}



 83%|████████▎ | 4400/5300 [03:53<00:41, 21.79it/s]Saving model checkpoint to temp/checkpoint-4400
Configuration saved in temp/checkpoint-4400/config.json
Model weights saved in temp/checkpoint-4400/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4400/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4400/special_tokens_map.json


{'eval_loss': 0.4646420478820801, 'eval_precision': 0.5006613756613757, 'eval_recall': 0.6838301716350497, 'eval_f1': 0.5780832378770523, 'eval_accuracy': 0.845364115112506, 'eval_runtime': 0.5017, 'eval_samples_per_second': 1993.256, 'eval_steps_per_second': 5.98, 'epoch': 83.02}


 91%|█████████ | 4800/5300 [04:13<00:22, 22.66it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4906, 'learning_rate': 5.208333333333334e-06, 'epoch': 90.57}



 91%|█████████ | 4800/5300 [04:13<00:22, 22.66it/s]Saving model checkpoint to temp/checkpoint-4800
Configuration saved in temp/checkpoint-4800/config.json


{'eval_loss': 0.4606504440307617, 'eval_precision': 0.5019710906701709, 'eval_recall': 0.6901535682023487, 'eval_f1': 0.581209585393686, 'eval_accuracy': 0.8461620298952072, 'eval_runtime': 0.5193, 'eval_samples_per_second': 1925.618, 'eval_steps_per_second': 5.777, 'epoch': 90.57}


Model weights saved in temp/checkpoint-4800/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-4800/tokenizer_config.json
Special tokens file saved in temp/checkpoint-4800/special_tokens_map.json
 98%|█████████▊| 5200/5300 [04:33<00:04, 21.27it/s]***** Running Evaluation *****
  Num examples = 1000
  Batch size = 384


{'loss': 0.4885, 'learning_rate': 1.0416666666666667e-06, 'epoch': 98.11}



 98%|█████████▊| 5200/5300 [04:34<00:04, 21.27it/s]Saving model checkpoint to temp/checkpoint-5200
Configuration saved in temp/checkpoint-5200/config.json
Model weights saved in temp/checkpoint-5200/pytorch_model.bin
tokenizer config file saved in temp/checkpoint-5200/tokenizer_config.json
Special tokens file saved in temp/checkpoint-5200/special_tokens_map.json


{'eval_loss': 0.457880437374115, 'eval_precision': 0.500329163923634, 'eval_recall': 0.6865401987353207, 'eval_f1': 0.5788271134805789, 'eval_accuracy': 0.8466407787648279, 'eval_runtime': 0.4574, 'eval_samples_per_second': 2186.32, 'eval_steps_per_second': 6.559, 'epoch': 98.11}


100%|█████████▉| 5298/5300 [04:39<00:00, 20.26it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-5200 (score: 0.457880437374115).
100%|██████████| 5300/5300 [04:39<00:00, 18.94it/s]

{'train_runtime': 279.8424, 'train_samples_per_second': 3573.44, 'train_steps_per_second': 18.939, 'train_loss': 0.6899432113935363, 'epoch': 100.0}





TrainOutput(global_step=5300, training_loss=0.6899432113935363, metrics={'train_runtime': 279.8424, 'train_samples_per_second': 3573.44, 'train_steps_per_second': 18.939, 'train_loss': 0.6899432113935363, 'epoch': 100.0})