In [9]:
import torch
from transformers import BertTokenizerFast, BertForTokenClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from seqeval.metrics import f1_score, precision_score, recall_score, classification_report

dataset = load_dataset("dataset/my-own-dataset") #needed to construct a new Dataset class, to be done
num_of_labels = ...

tokenizer = BertTokenizerFast.from_pretrained("mshamrai/bert-base-ukr-eng-rus-uncased")
model = BertForTokenClassification.from_pretrained("mshamrai/bert-base-ukr-eng-rus-uncased", num_labels=num_of_labels)  # #number of labels


label_list = ...  #Collect all the labels like 'O', 'B-inscription', 'I-inscription'


def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
    labels = examples["ner_tags"]
    aligned_labels = []
    for i, label_list in enumerate(labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        aligned_label = []
        previous_word_idx = None
        for word_idx in word_ids:
            
            if word_idx is None:
                aligned_label.append(-100)
            
            elif word_idx != previous_word_idx:
                aligned_label.append(label_list[word_idx])
            
            else:
                aligned_label.append(label_list[word_idx])
            previous_word_idx = word_idx
        aligned_labels.append(aligned_label)
    tokenized_inputs["labels"] = aligned_labels
    return tokenized_inputs


tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)


train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["validation"]
test_dataset = tokenized_dataset["test"]


DatasetNotFoundError: Dataset 'dataset/my-own-dataset' doesn't exist on the Hub. If the repo is private or gated, make sure to log in with `huggingface-cli login`.

In [None]:
training_args = TrainingArguments(
    output_dir= ...,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir= ..,
    logging_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model= ...,
    report_to="none"
)

def compute_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(axis=2)
    true_labels = [[label for label in example if label != -100] for example in labels]
    true_predictions = [
        [label_list[p] for (p, label) in zip(pred, true_labels) if label != -100] for pred, true_labels in zip(predictions, true_labels)
    ]
    precision = precision_score(true_labels, true_predictions)
    recall = recall_score(true_labels, true_predictions)
    f1 = f1_score(true_labels, true_predictions)
    report = classification_report(true_labels, true_predictions, labels=label_list)
    print(report)
    return {
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

trainer.evaluate()

trainer.save_model('model/bert-model')


In [None]:
trainer.evaluate(test_dataset)