# Finetuning using Huggingface e2e

In [9]:
import os

import pandas as pd
import numpy as np
import torch

import transformers

#from transformers import  BertModel, BertTokenizer, BertForSequenceClassification, AdamW
from transformers import (AutoModel, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification,
                          Trainer, TrainingArguments)


In [10]:
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x17497a170>

In [11]:
if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


## Load processed labeled data

In [12]:
MAX_LENGHT = 512
DATA_DIR = "../../../data/merged_split/"

checkpoint = "dbmdz/bert-base-historic-dutch-cased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 32000
}

loading file https://huggingface.co/dbmdz/bert-base

### Helper: tokenizer

In [13]:
from transformers import DataCollatorWithPadding

def tokenize_function(row):
    return tokenizer(
        row["text"],
        truncation=True,
)

def tokenize_data(raw_datasets):
    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
    return tokenized_datasets

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=MAX_LENGHT)

# Fine tune multiple models

## Finetune: Type per decade (12 models)

In [10]:
%%time 

import os
import datasets
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import EarlyStoppingCallback


DECADES = ["1960s", "1970s", "1980s", "1990s"]
TYPES = ["oil", "gas", "coal"]

for DECADE in DECADES:
    for TYPE in TYPES:
        # Load dataset
        NAME = f"{DECADE}_{TYPE}"
        
        try:
            os.mkdir(f"../../../output/bert_finetune/{NAME}")
        except:
            continue
        
        DIR_MODEL = f"../../../output/bert_finetune/{NAME}"
        
        df = pd.read_csv(os.path.join(DATA_DIR,DECADE, f"{DECADE}_{TYPE}_merged_split.csv")).sample(frac=1)
        # Create training, validation and test
        # Divide into train and val
        train_texts, val_texts, train_labels, val_labels = train_test_split(list(df.text_split), list(df.labels), test_size=.2)
        train_dataset = pd.DataFrame()
        train_dataset["text"] = train_texts
        train_dataset["labels"] = train_labels

        # Divide val into val and test"
        print(" Divide val into val and test")
        test_texts, val_texts, test_labels, val_labels = train_test_split(val_texts, val_labels, test_size=.5)
        validation_dataset = pd.DataFrame()
        validation_dataset["text"] = val_texts
        validation_dataset["labels"] = val_labels

        test_dataset = pd.DataFrame()
        test_dataset["text"] = test_texts
        test_dataset["labels"] = test_labels
        
        print("Transform into Dataset from hf")
        # Transform into Dataset from hf
        train_dataset_raw = Dataset.from_pandas(train_dataset)
        train_dataset_raw = train_dataset_raw.rename_column('labels', 'label')

        validation_dataset_raw = Dataset.from_pandas(validation_dataset)
        validation_dataset_raw = validation_dataset_raw.rename_column('labels', 'label')

        test_dataset_raw = Dataset.from_pandas(test_dataset)
        test_dataset_raw = test_dataset_raw.rename_column('labels', 'label')
        
        print("Tokenize the datasets")
        # Tokenize the datasets
        training_dataset_tokenized = tokenize_data(train_dataset_raw)
        validation_dataset_tokenized = tokenize_data(validation_dataset_raw)
        test_dataset_tokenized = tokenize_data(test_dataset_raw)
        
        print("Save to disk")
        # Save to disk
        os.mkdir(os.path.join(DIR_MODEL, "datasets"))
        DIR_MODEL_DATASET = os.path.join(DIR_MODEL, "datasets")

        os.mkdir(os.path.join(DIR_MODEL_DATASET, "training"))
        training_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "training"))

        os.mkdir(os.path.join(DIR_MODEL_DATASET, "validation"))
        validation_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "validation"))

        os.mkdir(os.path.join(DIR_MODEL_DATASET, "test"))
        test_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "test"))

        
        print("Create data collator")
        # Create data collator
        data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=MAX_LENGHT)
        
        training_args = TrainingArguments(
            output_dir = DIR_MODEL,
            num_train_epochs=4,              # total number of training epochs
            evaluation_strategy="steps",
            logging_dir="../../../output/bert_finetune/logging",
            load_best_model_at_end=True,  
            seed=2020,
            #label_names=["label"], # check this
            disable_tqdm=False
        )

        model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)
        
        trainer = Trainer(
            model,
            training_args,
            train_dataset=training_dataset_tokenized,
            eval_dataset=validation_dataset_tokenized,
            data_collator=data_collator,
            tokenizer=tokenizer,
        )

        trainer.train()
        
        model.save_pretrained(DIR_MODEL)
        

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 3/3 [00:00<00:00,  9.71ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 112.73ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 93.75ba/s]


Save to disk
Create data collator


Some weights of the model checkpoint at dbmdz/bert-base-historic-dutch-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassificatio

Step,Training Loss,Validation Loss
500,1.0592,0.966065
1000,0.6248,1.192456


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 263
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1960s_coal/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1960s_coal/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1960s_coal/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1960s_coal/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1960s_coal/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `Ber

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 46.02ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 59.64ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 55.67ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,0.9576,0.867755
1000,0.7808,0.795126
1500,0.5789,0.970411
2000,0.4047,1.279772


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 601
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1970s_oil/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1970s_oil/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1970s_oil/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1970s_oil/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1970s_oil/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForS

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 20.13ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 190.18ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 116.86ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,0.9194,0.941037


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 173
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1970s_gas/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1970s_gas/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1970s_gas/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1970s_gas/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1970s_gas/checkpoint-500/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ../../../output/bert_finetune/1970s_gas/checkpoint-500 (score: 0.94103711843490

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 57.11ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 134.38ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 115.97ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,0.5899,0.643128
1000,0.5844,0.654236


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 298
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1970s_coal/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1970s_coal/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1970s_coal/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1970s_coal/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1970s_coal/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `Ber

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 48.78ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 70.99ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 77.13ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,1.0412,1.0386
1000,1.047,1.028184
1500,1.0306,1.028893
2000,1.0341,1.025019


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 591
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1980s_oil/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1980s_oil/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1980s_oil/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1980s_oil/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1980s_oil/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForS

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 54.38ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 79.84ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 80.76ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,1.0051,1.020583
1000,0.9946,1.009379
1500,0.9979,1.019883
2000,0.9871,1.01661


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 541
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1980s_gas/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1980s_gas/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1980s_gas/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1980s_gas/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1980s_gas/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForS

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 19.04ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 143.40ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 152.70ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,0.9896,0.997943
1000,0.9907,0.997477


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 285
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1980s_coal/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1980s_coal/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1980s_coal/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1980s_coal/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1980s_coal/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `Ber

 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 42.17ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 267.89ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 257.05ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


Configuration saved in ../../../output/bert_finetune/1990s_oil/config.json
Model weights saved in ../../../output/bert_finetune/1990s_oil/pytorch_model.bin


 Divide val into val and test
Transform into Dataset from hf
Tokenize the datasets


100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 70.36ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 250.93ba/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 241.72ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


Save to disk
Create data collator


loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "

Step,Training Loss,Validation Loss
500,0.7824,0.899392


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 134
  Batch size = 8
Saving model checkpoint to ../../../output/bert_finetune/1990s_gas/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1990s_gas/checkpoint-500/config.json


RuntimeError: [enforce fail at inline_container.cc:319] . unexpected pos 433093632 vs 433093520

## Finetune: Only types (3 models)

In [9]:
%%time 

import os
import datasets
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import EarlyStoppingCallback


DECADES = ["1960s", "1970s", "1980s", "1990s"]
TYPES = ["oil"]

for TYPE in TYPES:
    # Load dataset
    NAME = f"{TYPE}"
    
    try:
        os.mkdir(f"/home/leonardovida/data/volume_1/delphbert-results/6-finetuning-outputs/model-per-type/{NAME}")
    except :
        continue
    
    DIR_MODEL = f"/home/leonardovida/data/volume_1/delphbert-results/6-finetuning-outputs/model-per-type/{NAME}"
    df_list = []
    for DECADE in DECADES:
        temp = pd.read_csv(os.path.join(DATA_DIR, f"{DECADE}_{TYPE}_merged_split.csv"))
        df_list.append(temp)
    df = pd.concat(df_list)

    # Create training, validation and test
    # Divide into train and val
    train_texts, val_texts, train_labels, val_labels = train_test_split(list(df.text_split), list(df.labels), test_size=.2)
    train_dataset = pd.DataFrame()
    train_dataset["text"] = train_texts
    train_dataset["labels"] = train_labels

    # Divide val into val and test
    test_texts, val_texts, test_labels, val_labels = train_test_split(val_texts, val_labels, test_size=.5)
    validation_dataset = pd.DataFrame()
    validation_dataset["text"] = val_texts
    validation_dataset["labels"] = val_labels

    test_dataset = pd.DataFrame()
    test_dataset["text"] = test_texts
    test_dataset["labels"] = test_labels

    # Transform into Dataset from hf
    train_dataset_raw = Dataset.from_pandas(train_dataset)
    train_dataset_raw = train_dataset_raw.rename_column('labels', 'label')

    validation_dataset_raw = Dataset.from_pandas(validation_dataset)
    validation_dataset_raw = validation_dataset_raw.rename_column('labels', 'label')

    test_dataset_raw = Dataset.from_pandas(test_dataset)
    test_dataset_raw = test_dataset_raw.rename_column('labels', 'label')

    # Tokenize the datasets
    training_dataset_tokenized = tokenize_data(train_dataset_raw)
    validation_dataset_tokenized = tokenize_data(validation_dataset_raw)
    test_dataset_tokenized = tokenize_data(test_dataset_raw)

    # Save to disk
    os.mkdir(os.path.join(DIR_MODEL, "datasets"))
    DIR_MODEL_DATASET = os.path.join(DIR_MODEL, "datasets")
    
    os.mkdir(os.path.join(DIR_MODEL_DATASET, "training"))
    training_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "training"))

    os.mkdir(os.path.join(DIR_MODEL_DATASET, "validation"))
    validation_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "validation"))

    os.mkdir(os.path.join(DIR_MODEL_DATASET, "test"))
    test_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "test"))

    # Create data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=MAX_LENGHT)

    training_args = TrainingArguments(
        output_dir = DIR_MODEL,
        num_train_epochs=4,              # total number of training epochs
        evaluation_strategy="steps",
        logging_dir="/home/leonardovida/data/volume_1/delphbert-results/6-finetuning-outputs/model-per-type/logging",
        load_best_model_at_end=True,  
        seed=2020,
        #label_names=["label"], # check this
        disable_tqdm=False
    )

    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

    trainer = Trainer(
        model,
        training_args,
        train_dataset=training_dataset_tokenized,
        eval_dataset=validation_dataset_tokenized,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    trainer.train()

    model.save_pretrained(DIR_MODEL)



HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Some weights of the model checkpoint at wietsedv/bert-base-dutch-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at wietsedv/bert-

Step,Training Loss,Validation Loss,Runtime,Samples Per Second
500,1.0245,0.962023,4.5177,384.046
1000,0.9509,0.906383,4.4407,390.704
1500,0.9245,0.935806,4.4763,387.595
2000,0.7878,0.95208,4.5048,385.144
2500,0.655,0.921921,4.537,382.412


CPU times: user 6min 29s, sys: 2min 9s, total: 8min 38s
Wall time: 5min 38s


## Test models

In [17]:
%%time 

import os
import glob
import datasets
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import EarlyStoppingCallback


DECADES = ["1960s","1970s","1980s", "1990s"]
print(DECADES)

for DECADE in DECADES:
    # Load dataset
    NAME = f"{DECADE}"
    
    try:
        os.mkdir(f"../../../output/bert_finetune/{NAME}")
    except :
        continue
    
    DIR_MODEL = f"../../../output/bert_finetune/{NAME}"
    
    DATA_DIR = "../../../data/merged_split/"
    FILE_DIR = glob.glob(os.path.join(DATA_DIR,DECADE,'*.csv'))

    
    df_list = [] 
    for f in FILE_DIR:
        df_tmp = pd.read_csv(f)
        df_list.append(df_tmp)
    df = pd.concat(df_list)

    print('decade:',NAME)
    print('df size :', df.shape)
    
    # Create training, validation and test
    # Divide into train and val
    train_texts, val_texts, train_labels, val_labels = train_test_split(list(df.text_split), list(df.labels), test_size=.2)
    train_dataset = pd.DataFrame()
    train_dataset["text"] = train_texts
    train_dataset["labels"] = train_labels

    # Divide val into val and test
    test_texts, val_texts, test_labels, val_labels = train_test_split(val_texts, val_labels, test_size=.5)
    validation_dataset = pd.DataFrame()
    validation_dataset["text"] = val_texts
    validation_dataset["labels"] = val_labels

    test_dataset = pd.DataFrame()
    test_dataset["text"] = test_texts
    test_dataset["labels"] = test_labels

    print('train_dataset:', train_dataset.shape)
    print('validation_dataset:', validation_dataset.shape)
    print('test_dataset:', test_dataset.shape)
    # Transform into Dataset from hf
    train_dataset_raw = Dataset.from_pandas(train_dataset)
    train_dataset_raw = train_dataset_raw.rename_column('labels', 'label')

    validation_dataset_raw = Dataset.from_pandas(validation_dataset)
    validation_dataset_raw = validation_dataset_raw.rename_column('labels', 'label')

    test_dataset_raw = Dataset.from_pandas(test_dataset)
    test_dataset_raw = test_dataset_raw.rename_column('labels', 'label')

    # Tokenize the datasets
    training_dataset_tokenized = tokenize_data(train_dataset_raw)
    validation_dataset_tokenized = tokenize_data(validation_dataset_raw)
    test_dataset_tokenized = tokenize_data(test_dataset_raw)

    # Save to disk
    os.mkdir(os.path.join(DIR_MODEL, "datasets"))
    DIR_MODEL_DATASET = os.path.join(DIR_MODEL, "datasets")
    
    os.mkdir(os.path.join(DIR_MODEL_DATASET, "training"))
    training_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "training"))

    os.mkdir(os.path.join(DIR_MODEL_DATASET, "validation"))
    validation_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "validation"))

    os.mkdir(os.path.join(DIR_MODEL_DATASET, "test"))
    test_dataset_tokenized.save_to_disk(os.path.join(DIR_MODEL_DATASET, "test"))

    # Create data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, max_length=MAX_LENGHT)

    training_args = TrainingArguments(
        output_dir = DIR_MODEL,
        num_train_epochs=4,              # total number of training epochs
        evaluation_strategy="steps",
        logging_dir= os.path.join(DIR_MODEL, "logging"),
        load_best_model_at_end=True,  
        seed=2020,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        #label_names=["label"], # check this
        disable_tqdm=False
    )

    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

    trainer = Trainer(
        model,
        training_args,
        train_dataset=training_dataset_tokenized,
        eval_dataset=validation_dataset_tokenized,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    trainer.train()

    model.save_pretrained(DIR_MODEL)


['1960s', '1970s', '1980s', '1990s']
decade: 1960s
df size : (10391, 6)
train_dataset: (8312, 2)
validation_dataset: (1040, 2)
test_dataset: (1039, 2)


100%|█████████████████████████████████████████████| 9/9 [00:00<00:00, 34.98ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 75.72ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 75.99ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures

Step,Training Loss,Validation Loss
500,1.0534,1.032067
1000,1.045,1.007771
1500,1.0418,1.007202
2000,1.0327,1.010823


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1040
  Batch size = 16
Saving model checkpoint to ../../../output/bert_finetune/1960s/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1960s/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1960s/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1960s/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1960s/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassificat

decade: 1970s
df size : (10716, 6)
train_dataset: (8572, 2)
validation_dataset: (1072, 2)
test_dataset: (1072, 2)


100%|█████████████████████████████████████████████| 9/9 [00:00<00:00, 22.38ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 54.66ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 60.64ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures

Step,Training Loss,Validation Loss
500,0.9736,0.927204
1000,0.8222,0.858539
1500,0.6511,0.877818
2000,0.5131,0.979873


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1072
  Batch size = 16
Saving model checkpoint to ../../../output/bert_finetune/1970s/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1970s/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1970s/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1970s/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1970s/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassificat

decade: 1980s
df size : (14158, 6)
train_dataset: (11326, 2)
validation_dataset: (1416, 2)
test_dataset: (1416, 2)


100%|███████████████████████████████████████████| 12/12 [00:00<00:00, 41.53ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 50.59ba/s]
100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 57.47ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures

Step,Training Loss,Validation Loss
500,1.0424,1.061798
1000,1.0361,1.048645
1500,1.0458,1.037554
2000,1.0287,1.041415
2500,1.0308,1.042204


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1416
  Batch size = 16
Saving model checkpoint to ../../../output/bert_finetune/1980s/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1980s/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1980s/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1980s/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1980s/checkpoint-500/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassificat

decade: 1990s
df size : (4469, 6)
train_dataset: (3575, 2)
validation_dataset: (447, 2)
test_dataset: (447, 2)


100%|█████████████████████████████████████████████| 4/4 [00:00<00:00, 25.73ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 92.02ba/s]
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 78.20ba/s]
using `logging_steps` to initialize `eval_steps` to 500
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/dbmdz/bert-base-historic-dutch-cased/resolve/main/config.json from cache at /Users/Zahed001/.cache/huggingface/transformers/1dd6939500d72963b7c7ed7033ba4857c1576a28f9f2819d8dde7a71f02e879e.5ffd597ad0cae5679a9afabdf80a381f3c67384ce1edbb6f01a13a7730a45b2e
Model config BertConfig {
  "_name_or_path": "dbmdz/bert-base-historic-dutch-cased",
  "architectures

Step,Training Loss,Validation Loss
500,0.7869,0.77309


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 447
  Batch size = 16
Saving model checkpoint to ../../../output/bert_finetune/1990s/checkpoint-500
Configuration saved in ../../../output/bert_finetune/1990s/checkpoint-500/config.json
Model weights saved in ../../../output/bert_finetune/1990s/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ../../../output/bert_finetune/1990s/checkpoint-500/tokenizer_config.json
Special tokens file saved in ../../../output/bert_finetune/1990s/checkpoint-500/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ../../../output/bert_finetune/1990s/checkpoint-500 (score: 0.7730902433395386).
Configuration save

CPU times: user 8h 28min 25s, sys: 57min 13s, total: 9h 25min 39s
Wall time: 3h 40min 54s
