## Datasetloader

In [1]:
# load dataset
from datasets import load_dataset
from nltk import pos_tag
raw_datasets = load_dataset("ncbi_disease")

Found cached dataset ncbi_disease (/home/serc305/.cache/huggingface/datasets/ncbi_disease/ncbi_disease/1.0.0/92314c7992b0b8a5ea2ad101be33f365b684a2cc011e0ffa29c691e6d32b2d03)


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
sentence = raw_datasets['train'][0]['tokens']

In [37]:
from datasets import load_dataset
from tqdm import tqdm, trange
# from keras.preprocessing.sequence import pad_sequences
from keras.utils import pad_sequences

def get_dataset(dataset_name):
     # get train, validation, test data
    sets = ['train', 'validation', 'test']
    raw_datasets = load_dataset(dataset_name) # test ncbi_disease
    ner_feature = raw_datasets["train"].features["ner_tags"]
    label2id = ner_feature.feature.names

    word_to_ix = {}
    # get all the sentences and labels and saving into one dic
    dataset = {
        'train': {
            'X':[],
            'y':[]
        },
        'validation':  {
            'X':[],
            'y':[]
        },
        'test':  {
            'X':[],
            'y':[]
        },
    }
    for set in sets:
        print(f"Loading {set} set ==>")
        for sent in tqdm(raw_datasets[f'{set}']):
            sentence = sent['tokens']
            dataset[f'{set}']['X'].append(sentence)
            for word in sentence:
                if word not in word_to_ix:# word has not been assigned an index yet
                    word_to_ix[word] = len(word_to_ix) # Assign each word with a unique index
            #ner_tags = [label_names[label] for label in ]
            dataset[f'{set}']['y'].append(sent['ner_tags'])
    
    return dataset, label2id, word_to_ix

def post_process(dataset, word_to_ix, tag2id):
    sets = list(dataset.keys())
    max_len = 0
    print(" processing word id =======>")
    for set in tqdm(sets):
        dataset[f'{set}']['X'] = [[word_to_ix[w] for w in s] for s in dataset[f'{set}']['X']]
        dataset[f'{set}']['y'] = [[to_categorical(w, num_classes=3) for w in s] for s in dataset[f'{set}']['y']]
        sub_max = max([len(i) for i in dataset[f'{set}']['y']])
        if sub_max > max_len:
            max_len = sub_max
    
    # padding
    print(" processing sequence padding =======>")
    for set in tqdm(sets):
        dataset[f'{set}']['X'] = pad_sequences(maxlen=max_len, sequences= dataset[f'{set}']['X'], padding="post", value=word_to_ix["ENDPAD"])
        dataset[f'{set}']['y'] = pad_sequences(maxlen=max_len, sequences= dataset[f'{set}']['y'], padding="post", value=tag2id["O"])
    return dataset, max_len

In [38]:
data_names = ['ncbi_disease', 'bc2gm_corpus', 'BC5CDR']
dataset, label2id, word_to_ix = get_dataset(dataset_name=data_names[0])
tag2id = dict(zip(label2id, list(range(len(label2id)))))
word_to_ix["ENDPAD"] = len(word_to_ix) # the corresponding padding
words = word_to_ix.keys()
ix_to_word = dict((v, k) for k, v in word_to_ix.items())
dataset, max_len = post_process(dataset, word_to_ix, tag2id)

Found cached dataset ncbi_disease (/home/serc305/.cache/huggingface/datasets/ncbi_disease/ncbi_disease/1.0.0/92314c7992b0b8a5ea2ad101be33f365b684a2cc011e0ffa29c691e6d32b2d03)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading train set ==>


100%|██████████| 5433/5433 [00:00<00:00, 22611.06it/s]


Loading validation set ==>


100%|██████████| 924/924 [00:00<00:00, 12637.40it/s]


Loading test set ==>


100%|██████████| 941/941 [00:00<00:00, 14541.77it/s]




100%|██████████| 3/3 [00:00<00:00,  4.96it/s]




100%|██████████| 3/3 [00:00<00:00, 26.22it/s]


In [40]:
from keras.models import Model
from tensorflow.keras.layers import Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
import tensorflow as tf
from keras.metrics import Precision, Recall
from tqdm.keras import TqdmCallback

n_words = len(word_to_ix.keys())
n_tags = len(tag2id.keys())

embedding_dim = 300 # v1 without pretrain, it was 50
input_ = Input(shape=(max_len,))

# define model
model = Embedding(input_dim=n_words, output_dim=embedding_dim, 
                      input_length=max_len)(input_)
model = Dropout(0.1)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model)
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model)  # softmax output layer
model = Model(input_, out)

# filepath = './checkpoints/'
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=filepath,
#                                                  save_weights_only=True,
#                                                  verbose=1)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
history = model.fit(dataset['train']['X'], dataset['train']['y'], batch_size=32, epochs=5, 
                     verbose=0, validation_data=(dataset['validation']['X'], dataset['validation']['y'])) 

2023-03-25 03:51:00.382443: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [41]:
import numpy as np
test_pred = model.predict(np.array(dataset['test']['X']), verbose=1)



In [43]:
idx2tag = {i: w for w, i in tag2id.items()}

def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(idx2tag[p_i].replace("PAD", "O"))
        out.append(out_i)
    return out
    
pred_labels = pred2label(test_pred)
test_labels = pred2label(dataset['test']['y'])

In [45]:
import evaluate
seqeval = evaluate.load("seqeval")
results = seqeval.compute(predictions=pred_labels, references=test_labels)
print(results)

{'Disease': {'precision': 0.18044619422572178, 'recall': 0.2864583333333333, 'f1': 0.22141706924315618, 'number': 960}, 'overall_precision': 0.18044619422572178, 'overall_recall': 0.2864583333333333, 'overall_f1': 0.22141706924315618, 'overall_accuracy': 0.9227339882325497}


In [46]:
max_len

123

In [None]:
# change each token to word_id
sets = ['train', 'validation', 'test']
max_len = 0

max_len = len(X[0])
n_words = len(word_to_ix.keys())
n_tags = len(tag2id.keys())

{'O': 0, 'B-Disease': 1, 'I-Disease': 2}

### CRF

In [1]:
# load dataset
from datasets import load_dataset
from nltk import pos_tag
from tqdm import tqdm, trange
from sklearn_crfsuite import CRF


def word2features(sent, i):
    '''
    Processing the each word in the sentences and extracting features
    Input:
        sent: a sentence, i.e. a list of tuples (word, postag, label)
        i: the index of the word
    Output:
        features: a dictionary of features
    '''
    word = str(sent[i][0])
    postag = str(sent[i][1])
    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = str(sent[i-1][0])
        postag1 = str(sent[i-1][1])
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = str(sent[i+1][0])
        postag1 = str(sent[i+1][1])
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features


def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def get_dataset(dataset_name):
     # get train, validation, test data
    sets = ['train', 'validation', 'test']
    raw_datasets = load_dataset(dataset_name) # test ncbi_disease
    ner_feature = raw_datasets["train"].features["ner_tags"]
    label_names = ner_feature.feature.names

    # get all the sentences and labels and saving into one dic
    dataset = {
        'train': {
            'X':[],
            'y':[]
        },
        'validation':  {
            'X':[],
            'y':[]
        },
        'test':  {
            'X':[],
            'y':[]
        },
    }
    for set in sets:
        print(f"Loading {set} set ==>")
        for sent in tqdm(raw_datasets[f'{set}']):
            pos_sent = pos_tag(sent['tokens'])
            dataset[f'{set}']['X'].append(sent2features(pos_sent))
            ner_tags = [label_names[label] for label in sent['ner_tags']]
            dataset[f'{set}']['y'].append(ner_tags)
    return dataset, label_names

In [2]:
# load dataset
data_names = ['ncbi_disease', 'bc2gm_corpus', 'BC5CDR']
dataset, label_names = get_dataset(dataset_name=data_names[0])



  0%|          | 0/3 [00:00<?, ?it/s]

Loading train set ==>


100%|██████████| 5433/5433 [00:04<00:00, 1339.72it/s]


Loading validation set ==>


100%|██████████| 924/924 [00:00<00:00, 1229.06it/s]


Loading test set ==>


100%|██████████| 941/941 [00:00<00:00, 1285.98it/s]


In [3]:
# defining model
from sklearn_crfsuite.metrics import flat_classification_report
from sklearn_crfsuite import metrics
crf = CRF(algorithm='lbfgs',
        c1=10,
        c2=0.1,
        max_iterations=100,
        all_possible_transitions=False)

# training model
# fit by sklearn style
crf.fit(dataset['train']['X'], dataset['train']['y'])
# test
y_pred = crf.predict(dataset['test']['X'])

In [30]:
# y_pred[0]

In [4]:
report = flat_classification_report(dataset['test']['y'], y_pred, labels=label_names, digits=3)
print(report)

TypeError: classification_report() takes 2 positional arguments but 3 positional arguments (and 1 keyword-only argument) were given

In [6]:
results = seqeval.compute(predictions=y_pred, references=dataset['test']['y'])

In [7]:
results

{'Disease': {'precision': 0.7715179968701096,
  'recall': 0.5135416666666667,
  'f1': 0.616635397123202,
  'number': 960},
 'overall_precision': 0.7715179968701096,
 'overall_recall': 0.5135416666666667,
 'overall_f1': 0.616635397123202,
 'overall_accuracy': 0.9563211821855737}

In [3]:
sentences = []
lables = []
for sent in raw_datasets['train']:
    pos_sent = pos_tag(sent['tokens'])
    sentences.append(pos_sent)
    lables.append(sent['ner_tags'])

In [8]:
X = [sent2features(s) for s in sentences]
y = lables

In [10]:
# X[0]

In [4]:
sentence = raw_datasets['train'][0]['tokens']
label = raw_datasets['train'][0]['ner_tags']

In [14]:
pos[0]

('Identification', 'NN')

In [6]:
ner_feature = raw_datasets["train"].features["ner_tags"]
ner_feature

Sequence(feature=ClassLabel(names=['O', 'B-Disease', 'I-Disease'], id=None), length=-1, id=None)

In [7]:
label_names = ner_feature.feature.names
label_names

['O', 'B-Disease', 'I-Disease']

In [8]:
labels = raw_datasets["train"][0]["ner_tags"]
labels

[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0]

In [9]:
y = [label_names[label] for label in labels]
y

['O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-Disease',
 'I-Disease',
 'I-Disease',
 'I-Disease',
 'O',
 'O']

In [5]:
words = raw_datasets["train"][0]["tokens"]
labels = raw_datasets["train"][0]["ner_tags"]
line1 = ""
line2 = ""
for word, label in zip(words, labels):
    full_label = label_names[label]
    max_length = max(len(word), len(full_label))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += full_label + " " * (max_length - len(full_label) + 1)
print(line1)
print(line2)

Identification of APC2 , a homologue of the adenomatous polyposis coli      tumour    suppressor . 
O              O  O    O O O         O  O   B-Disease   I-Disease I-Disease I-Disease O          O 


In [6]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [7]:
tokenizer.is_fastw

True

In [8]:
# inputs = tokenizer(raw_datasets["train"][0]["tokens"], is_split_into_words=True)
# inputs.tokens()

['[CLS]',
 'I',
 '##dent',
 '##ification',
 'of',
 'AP',
 '##C',
 '##2',
 ',',
 'a',
 'ho',
 '##mo',
 '##logue',
 'of',
 'the',
 'ad',
 '##eno',
 '##mat',
 '##ous',
 'p',
 '##oly',
 '##po',
 '##sis',
 'co',
 '##li',
 't',
 '##umour',
 'suppress',
 '##or',
 '.',
 '[SEP]']

In [9]:
inputs.word_ids()

[None,
 0,
 0,
 0,
 1,
 2,
 2,
 2,
 3,
 4,
 5,
 5,
 5,
 6,
 7,
 8,
 8,
 8,
 8,
 9,
 9,
 9,
 9,
 10,
 10,
 11,
 11,
 12,
 12,
 13,
 None]

In [10]:
def align_labels_with_tokens(labels, word_ids):
    '''
    Allign the labels for sub-words tokens
    '''
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [11]:
labels = raw_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))

[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0]
[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, -100]


In [12]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

In [13]:
tokenized_datasets = raw_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=raw_datasets["train"].column_names,
)

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [14]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 5433
    })
    validation: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 924
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 941
    })
})

In [15]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [16]:
batch = data_collator([tokenized_datasets["train"][i] for i in range(2)])
batch["labels"]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


tensor([[-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    1,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    0,    0,    0, -100, -100, -100, -100, -100, -100,
         -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
         -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
         -100, -100, -100, -100, -100],
        [-100,    0,    1,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0, -100]])

In [17]:
for i in range(2):
    print(tokenized_datasets["train"][i]["labels"])

[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, -100]
[-100, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]


In [18]:
import evaluate

metric = evaluate.load("seqeval")

In [19]:
labels = raw_datasets["train"][0]["ner_tags"]
labels = [label_names[i] for i in labels]
labels

['O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-Disease',
 'I-Disease',
 'I-Disease',
 'I-Disease',
 'O',
 'O']

In [20]:
predictions = labels.copy()
predictions[-3] = "O"
metric.compute(predictions=[predictions], references=[labels])

{'Disease': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1},
 'overall_precision': 0.0,
 'overall_recall': 0.0,
 'overall_f1': 0.0,
 'overall_accuracy': 0.9285714285714286}

In [21]:
import numpy as np


def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [22]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [23]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

In [24]:
model.config.num_labels

3

In [27]:
from huggingface_hub import notebook_login

notebook_login()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Login successful
Your token has been saved to /home/serc305/.huggingface/token
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[1m[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default

git config --global credential.helper store[0m


In [28]:
from transformers import TrainingArguments

args = TrainingArguments(
    "bert-finetuned-ner",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

In [29]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Cloning https://huggingface.co/zqq17/bert-finetuned-ner into local empty directory.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

***** Running training *****
  Num examples = 5433
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2040
  Number of trainable parameters = 107721987


Epoch,Training Loss,Validation Loss


***** Running Evaluation *****
  Num examples = 924
  Batch size = 8
  Num examples = 924
  Batch size = 8
Saving model checkpoint to bert-finetuned-ner/checkpoint-680
Configuration saved in bert-finetuned-ner/checkpoint-680/config.json
Model weights saved in bert-finetuned-ner/checkpoint-680/pytorch_model.bin
tokenizer config file saved in bert-finetuned-ner/checkpoint-680/tokenizer_config.json
Special tokens file saved in bert-finetuned-ner/checkpoint-680/special_tokens_map.json
tokenizer config file saved in bert-finetuned-ner/tokenizer_config.json
Special tokens file saved in bert-finetuned-ner/special_tokens_map.json


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


tokenizer config file saved in bert-finetuned-ner/tokenizer_config.json
Special tokens file saved in bert-finetuned-ner/special_tokens_map.json


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


tokenizer config file saved in bert-finetuned-ner/tokenizer_config.json
Special tokens file saved in bert-finetuned-ner/special_tokens_map.json


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

TrainOutput(global_step=2040, training_loss=0.04802936818085465, metrics={'train_runtime': 214.5296, 'train_samples_per_second': 75.976, 'train_steps_per_second': 9.509, 'total_flos': 525476611316880.0, 'train_loss': 0.04802936818085465, 'epoch': 3.0})