# Importation des modules

In [32]:
import torch
from transformers import pipeline

checkpoint = "bert-base-uncased"

# Pré-traitement des données
import csv
import pandas as pd
import datasets
from transformers import DataCollatorForTokenClassification
from transformers import BertTokenizerFast
from sklearn.model_selection import train_test_split

# Entraînement du classifieur
from transformers import BertForTokenClassification
from transformers import TrainingArguments, Trainer
from torch.optim import NAdam, Adam
from transformers import get_scheduler

# Evaluation
import numpy as np
import evaluate

# Pre-processing

## Label encoding

In [33]:
label2id = {
    "B-NPI": 0,
    "I-NPI": 1,
    "O": 2
}

id2label = {
    0: "B-NPI",
    1: "I-NPI",
    2: "O"
}

## Importation des données

In [35]:
raw_tokens = [] # Liste de listes : une sub-list = une phrase
labels = []

with open("../data/fake_dataset.csv") as csvfile:
    
    reader = csv.reader(csvfile, delimiter=",")
    
    temp_tokens = []
    temp_labels = []
    
    next(reader) # Ignore première ligne (en-tête du CSV)
    
    for row in reader:
        
        if row == ["#", "#"]: # Nouvelle phrase
            
            # Ajout des listes tampon
            raw_tokens.append(temp_tokens)
            labels.append(temp_labels)
            
            # On vide les listes tampon
            temp_tokens = []
            temp_labels = []
        
        else:
            temp_tokens.append(row[0])
            temp_labels.append(label2id[row[1].strip()])

# Split en train/test
raw_tokens_train, raw_tokens_test, labels_train, labels_test = train_test_split(raw_tokens, labels, test_size=0.2)

In [36]:
# Corpus d'entraînement
data_train = {
    "tokens": raw_tokens_train,
    "labels": labels_train
}

# Corpus de test
data_test = {
    "tokens": raw_tokens_test,
    "labels": labels_test
}

# Création d'un objet Dataset comportant le train et le test
dataset = datasets.DatasetDict({
    "train": datasets.Dataset.from_pandas(pd.DataFrame(data_train)),
    "test": datasets.Dataset.from_pandas(pd.DataFrame(data_test))
})

In [37]:
dataset

DatasetDict({
    train: Dataset({
        features: ['tokens', 'labels'],
        num_rows: 8
    })
    test: Dataset({
        features: ['tokens', 'labels'],
        num_rows: 2
    })
})

## Mise en forme des données dans le bon format pour BERT

In [39]:
# Création du tokenizer à partir du même checkpoint
tokenizer = BertTokenizerFast.from_pretrained(checkpoint)

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer,
                                                   max_length=20, # Dans le mémoire : 128
                                                   padding="max_length")

loading file vocab.txt from cache at C:\Users\aengp/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\vocab.txt
loading file tokenizer.json from cache at C:\Users\aengp/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at C:\Users\aengp/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\tokenizer_config.json
loading configuration file config.json from cache at C:\Users\aengp/.cache\huggingface\hub\models--bert-base-uncased\snapshots\0a6aa9128b6194f4f3c4db429b6cb4891cdb421b\config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient

In [8]:
def shift_label(label):
    # Transformation des B-NPI en I-NPI quand le token correspond à un subword
    if label % 2 == 1:
        label += 1
    return label

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        
        # Caractères spéciaux, comme [CLS] et [SEP]
        if word_id is None: 
            new_labels.append(-100)
            
        # Nouveau mot 
        elif word_id != current_word:
            current_word = word_id
            new_labels.append(labels[word_id])
            
        # Transformation du B-NPI en I-NPI si c'est un subword
        else:
            new_labels.append(shift_label(labels[word_id]))
            
    return new_labels

def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"],
                                 truncation=True,
                                 is_split_into_words=True) # Déjà tokenizé
    new_labels = []
    for i, labels in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))
    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

processed_dataset = dataset.map(tokenize_and_align_labels, batched=True)

                                                 

In [41]:
processed_dataset["train"]["labels"]

[[-100, 2, 2, 2, 0, 2, -100],
 [-100, 2, 2, 2, 2, 0, 2, -100],
 [-100, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, -100],
 [-100, 2, 2, 2, 2, 2, 0, 2, 2, -100],
 [-100, 2, 2, 2, 0, 1, 2, -100],
 [-100, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, -100],
 [-100, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -100],
 [-100, 2, 2, 2, 2, 2, 2, -100]]

# Metrics

In [30]:
label_list = list(label2id.keys())
seqeval = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p    
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

# Entraînement du classifieur

In [12]:
model = BertForTokenClassification.from_pretrained(checkpoint, 
                                                    num_labels=3,
                                                    id2label=id2label, 
                                                    label2id=label2id)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

In [31]:
training_args = TrainingArguments(
    output_dir="npi_bio_model",
    learning_rate=0.1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=2,
    num_train_epochs=20,
    max_grad_norm=10, # Gradient clipping
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=True
)

optimizer = Adam(model.parameters(),
                  lr=0.1)

num_training_steps = training_args.num_train_epochs * len(processed_dataset["train"])

lr_scheduler = get_scheduler(name="linear", 
                             optimizer=optimizer,
                             num_warmup_steps=0,
                             num_training_steps=num_training_steps)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    optimizers=(optimizer, lr_scheduler),
    compute_metrics=compute_metrics
)

trainer.train()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
c:\Users\aengp\Desktop\memoire\NPI_memoire\test\npi_bio_model is already a clone of https://huggingface.co/delphine-nguyen/npi_bio_model. Make sure you pull the latest changes with `repo.git_pull()`.
The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8
  Num Epochs = 20
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 40
  Num

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 14.349644660949707, 'eval_precision': 0.09523809523809523, 'eval_recall': 1.0, 'eval_f1': 0.17391304347826084, 'eval_accuracy': 0.09523809523809523, 'eval_runtime': 0.138, 'eval_samples_per_second': 14.497, 'eval_steps_per_second': 7.249, 'epoch': 1.0}


Model weights saved in npi_bio_model\checkpoint-2\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-2\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-2\special_tokens_map.json
tokenizer config file saved in npi_bio_model\tokenizer_config.json
Special tokens file saved in npi_bio_model\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                        

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 7.343986988067627, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.1011, 'eval_samples_per_second': 19.78, 'eval_steps_per_second': 9.89, 'epoch': 2.0}


Model weights saved in npi_bio_model\checkpoint-4\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-4\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-4\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 5.7946648597717285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0732, 'eval_samples_per_second': 27.335, 'eval_steps_per_second': 13.668, 'epoch': 3.0}


Model weights saved in npi_bio_model\checkpoint-6\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-6\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-6\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














[A[A[A[A[A[A[A[A[A[A[A[A[A[A

[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[A[A[A[A[A[A

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 27.000112533569336, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 24.279, 'eval_steps_per_second': 12.139, 'epoch': 4.0}


Model weights saved in npi_bio_model\checkpoint-8\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-8\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-8\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 2.9531192779541016, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.065, 'eval_samples_per_second': 30.761, 'eval_steps_per_second': 15.381, 'epoch': 5.0}


Model weights saved in npi_bio_model\checkpoint-10\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-10\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-10\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 5.859758377075195, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0655, 'eval_samples_per_second': 30.547, 'eval_steps_per_second': 15.273, 'epoch': 6.0}


Model weights saved in npi_bio_model\checkpoint-12\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-12\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-12\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 4.295331001281738, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0774, 'eval_samples_per_second': 25.838, 'eval_steps_per_second': 12.919, 'epoch': 7.0}


Model weights saved in npi_bio_model\checkpoint-14\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-14\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-14\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














[A[A[A[A[A[A[A[A[A[A[A[A[A[A

[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[A[A[A[A[A

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 2.642864227294922, 'eval_precision': 0.09523809523809523, 'eval_recall': 1.0, 'eval_f1': 0.17391304347826084, 'eval_accuracy': 0.09523809523809523, 'eval_runtime': 0.0684, 'eval_samples_per_second': 29.234, 'eval_steps_per_second': 14.617, 'epoch': 8.0}


Model weights saved in npi_bio_model\checkpoint-16\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-16\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-16\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 0.6109265089035034, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0799, 'eval_samples_per_second': 25.046, 'eval_steps_per_second': 12.523, 'epoch': 9.0}


Model weights saved in npi_bio_model\checkpoint-18\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-18\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-18\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 0.3750423192977905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0653, 'eval_samples_per_second': 30.639, 'eval_steps_per_second': 15.319, 'epoch': 10.0}


Model weights saved in npi_bio_model\checkpoint-20\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-20\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-20\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 0.37735599279403687, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.076, 'eval_samples_per_second': 26.318, 'eval_steps_per_second': 13.159, 'epoch': 11.0}


Model weights saved in npi_bio_model\checkpoint-22\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-22\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-22\special_tokens_map.json













[A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[AThe following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2
  Batch size = 2














  _warn_prf(average, modifier, msg_start, len(result))


[A[A                                        



[A[A[A[A                                  




[A[A[A[A[A                               





[A[A[A[A[A[A                            







[A[A[A[A[A[A[A[A                      









[A[A[

[['O', 'O', 'O', 'O', 'B-NPI', 'O'], ['O', 'O', 'O', 'B-NPI', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']]
{'eval_loss': 0.7217587232589722, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_runtime': 0.0713, 'eval_samples_per_second': 28.033, 'eval_steps_per_second': 14.017, 'epoch': 12.0}


Model weights saved in npi_bio_model\checkpoint-24\pytorch_model.bin
tokenizer config file saved in npi_bio_model\checkpoint-24\tokenizer_config.json
Special tokens file saved in npi_bio_model\checkpoint-24\special_tokens_map.json


KeyboardInterrupt: 

In [None]:
trainer.push_to_hub()

NameError: name 'trainer' is not defined

# Utilisation du modèle

In [None]:
input = "Did you eat anything today?"

classifier = pipeline("ner", model="delphine-nguyen/npi_bio_model")
classifier(input)