<a href="https://colab.research.google.com/github/aimanyounises1/NLP_WEB/blob/master/AraBERT_NER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#installing dependencies

In [None]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))
    !nvidia-smi

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB
Sat Jan 30 08:48:56 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    27W / 250W |     10MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------

In [None]:
!pip install optuna
!pip install seqeval
!pip install sentencepiece
!git clone https://github.com/huggingface/transformers
!cd transformers && pip install .
!pip install farasapy
!pip install pyarabic
!git clone https://github.com/aub-mind/arabert

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/59/b4/a1a80252cef3d8f5a0acdf6e678d6dc07e2e6964ee46d0453a2ae1af1ecb/optuna-2.4.0-py3-none-any.whl (282kB)
[K     |████████████████████████████████| 286kB 8.0MB/s 
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/0f/8f/3c74fa4b6c3db1051b495385f5302fc5d5aa0f180d40ce3e9a13c82f8c82/cliff-3.6.0-py3-none-any.whl (79kB)
[K     |████████████████████████████████| 81kB 9.0MB/s 
Collecting colorlog
  Downloading https://files.pythonhosted.org/packages/5e/39/0230290df0519d528d8d0ffdfd900150ed24e0076d13b1f19e279444aab1/colorlog-4.7.2-py2.py3-none-any.whl
Collecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/cd/d2/1c6e91299280ef1a6dadbbd5e762a8b091d02e2340a9ff001b58ca80f536/alembic-1.5.3.tar.gz (1.1MB)
[K     |████████████████████████████████| 1.1MB 10.2MB/s 
[?25hCollecting cmaes>=0.6.0
  Downloading https://files.pythonhosted.org/packages/8d/3c/06c76ec8b54b9b1fad7f3

#Creating training datasets

In [None]:
import pandas as pd
import numpy as np

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
all_datasets= []

In [None]:
class Dataset:
    def __init__(
        self,
        name,
        train,
        test,
        label_list,
    ):
        self.name = name
        self.train = train
        self.test = test
        self.label_list = label_list

all_datasets = []

#ANERCorp

We are using the ANERCorp with the Camel Lab splits from https://camel.abudhabi.nyu.edu/anercorp/

In [None]:
!unzip "/content/drive/MyDrive/ANERcorp-CamelLabSplits.zip"

Archive:  /content/drive/MyDrive/ANERcorp-CamelLabSplits.zip


In [None]:
from collections import Counter
import matplotlib.pyplot as plt

`read_ANERcorp` return a list of examples

each example has a list of tokens and a list of corresponding labels

In [None]:
def read_ANERcorp(path):
  with open(path,'r',encoding='utf-8') as f:
    data = []
    sentence = []
    label = []
    for line in f:
      if line=='\n':
        if len(sentence) > 0:
          data.append((sentence,label))
          sentence = []
          label = []
        continue
      splits = line.split()
      sentence.append(splits[0])
      label.append(splits[1])
    if len(sentence) > 0:
      data.append((sentence,label))
  return data

In [None]:
ANERCorp_path = './ANERcorp-CamelLabSplits/'
data_train = read_ANERcorp(ANERCorp_path+'ANERCorp_CamelLab_train.txt')
data_test = read_ANERcorp(ANERCorp_path+'ANERCorp_CamelLab_test.txt')

print(Counter([ label for sentence in data_test for label in sentence[1]]))
print(Counter([ label for sentence in data_train for label in sentence[1]]))

label_list = list(Counter([ label for sentence in data_test for label in sentence[1]]).keys())
print(label_list)

In [None]:
print("Training Sentence Lengths: ")
plt.hist([ len(sentence[0]) for sentence in data_train],bins=range(0,256,2))
plt.show()
print(sum([len(sentence[0]) > 512 for sentence in data_train]))

print("Testing Sentence Lengths: ")
plt.hist([ len(sentence[0]) for sentence in data_test],bins=range(0,256,2))
plt.show()
print(sum([len(sentence[0]) > 256 for sentence in data_test]))

256 seems a good choice for max_length

In [None]:
data_AJGT = Dataset("ANERCorp", data_train, data_test, label_list)
all_datasets.append(data_AJGT)

#Trainer

In [None]:
from arabert.preprocess import ArabertPreprocessor
import numpy as np
from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

from transformers import AutoConfig, AutoModelForTokenClassification, AutoTokenizer
from transformers import Trainer , TrainingArguments
from transformers.trainer_utils import EvaluationStrategy
from transformers.data.processors.utils import InputFeatures
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.utils import resample
import logging
import torch
import optuna 

In [None]:
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)

In [None]:
for x in all_datasets:
  print(x.name)

In [None]:
dataset_name = 'ANERCorp'
model_name = 'aubmindlab/bert-base-arabertv02'
task_name = 'tokenclassification'

In [None]:
for d in all_datasets:
  if d.name==dataset_name:
    selected_dataset = d
    print('Dataset found')
    break

Dataset found


Create, preprocess, and tokenize ANERCorp

In [None]:
class NERDataset:
  def __init__(self, texts, tags, label_list, model_name, max_length):
    self.texts = texts
    self.tags = tags
    self.label_map = {label: i for i, label in enumerate(label_list)}
    self.preprocessor = ArabertPreprocessor(model_name.split("/")[-1])    
    self.pad_token_label_id = torch.nn.CrossEntropyLoss().ignore_index
    # Use cross entropy ignore_index as padding label id so that only
    # real label ids contribute to the loss later.
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
    self.max_length = max_length

     
  def __len__(self):
    return len(self.texts)
  
  def __getitem__(self, item):
    textlist = self.texts[item]
    tags = self.tags[item]

    tokens = []
    label_ids = []
    for word, label in zip(textlist, tags):      
      clean_word = self.preprocessor.preprocess(word)  
      word_tokens = self.tokenizer.tokenize(clean_word)

      if len(word_tokens) > 0:
        tokens.extend(word_tokens)    
        # Use the real label id for the first token of the word, and padding ids for the remaining tokens
        label_ids.extend([self.label_map[label]] + [self.pad_token_label_id] * (len(word_tokens) - 1))
 
    # Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
    special_tokens_count = self.tokenizer.num_special_tokens_to_add()
    if len(tokens) > self.max_length - special_tokens_count:
      tokens = tokens[: (self.max_length - special_tokens_count)]
      label_ids = label_ids[: (self.max_length - special_tokens_count)]
  
    #Add the [SEP] token
    tokens += [self.tokenizer.sep_token]
    label_ids += [self.pad_token_label_id]
    token_type_ids = [0] * len(tokens)

    #Add the [CLS] TOKEN
    tokens = [self.tokenizer.cls_token] + tokens
    label_ids = [self.pad_token_label_id] + label_ids
    token_type_ids = [0] + token_type_ids

    input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

    # The mask has 1 for real tokens and 0 for padding tokens. Only real
    # tokens are attended to.
    attention_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    padding_length = self.max_length - len(input_ids)

    input_ids += [self.tokenizer.pad_token_id] * padding_length
    attention_mask += [0] * padding_length
    token_type_ids += [0] * padding_length
    label_ids += [self.pad_token_label_id] * padding_length

    assert len(input_ids) == self.max_length
    assert len(attention_mask) == self.max_length
    assert len(token_type_ids) == self.max_length
    assert len(label_ids) == self.max_length

    # if item < 5:
    #   print("*** Example ***")
    #   print("tokens:", " ".join([str(x) for x in tokens]))
    #   print("input_ids:", " ".join([str(x) for x in input_ids]))
    #   print("attention_mask:", " ".join([str(x) for x in attention_mask]))
    #   print("token_type_ids:", " ".join([str(x) for x in token_type_ids]))
    #   print("label_ids:", " ".join([str(x) for x in label_ids]))
    
    return {
        'input_ids' : torch.tensor(input_ids, dtype=torch.long),
        'attention_mask' : torch.tensor(attention_mask, dtype=torch.long),
        'token_type_ids' : torch.tensor(token_type_ids, dtype=torch.long),
        'labels' : torch.tensor(label_ids, dtype=torch.long)       
    }

In [None]:
label_map = { v:index for index, v in enumerate(selected_dataset.label_list) }
print(label_map)

train_dataset = NERDataset(
    texts=[x[0] for x in selected_dataset.train],
    tags=[x[1] for x in selected_dataset.train],
    label_list=selected_dataset.label_list,
    model_name=model_name,
    max_length=256
    )

test_dataset = NERDataset(
    texts=[x[0] for x in selected_dataset.test],
    tags=[x[1] for x in selected_dataset.test],
    label_list=selected_dataset.label_list,
    model_name=model_name,
    max_length=256
    )

{'B-LOC': 0, 'O': 1, 'B-PERS': 2, 'I-PERS': 3, 'B-ORG': 4, 'I-LOC': 5, 'I-ORG': 6, 'B-MISC': 7, 'I-MISC': 8}


Get NER Model

In [None]:
def model_init():
    return AutoModelForTokenClassification.from_pretrained(model_name, return_dict=True, num_labels=len(label_map))

In [None]:
inv_label_map = {i: label for i, label in enumerate(label_list)}

def align_predictions(predictions, label_ids):
    preds = np.argmax(predictions, axis=2)

    batch_size, seq_len = preds.shape

    out_label_list = [[] for _ in range(batch_size)]
    preds_list = [[] for _ in range(batch_size)]

    for i in range(batch_size):
        for j in range(seq_len):
            if label_ids[i, j] != torch.nn.CrossEntropyLoss().ignore_index:
                out_label_list[i].append(inv_label_map[label_ids[i][j]])
                preds_list[i].append(inv_label_map[preds[i][j]])

    return preds_list, out_label_list

def compute_metrics(p):
    preds_list, out_label_list = align_predictions(p.predictions,p.label_ids)
    #print(classification_report(out_label_list, preds_list,digits=4))
    return {
        "accuracy_score": accuracy_score(out_label_list, preds_list),
        "precision": precision_score(out_label_list, preds_list),
        "recall": recall_score(out_label_list, preds_list),
        "f1": f1_score(out_label_list, preds_list),
    }

# Doing Hyper parameter search with GRIDsearch sampler i.e. optuna will quit when all combination of values in the search space have been tested

In [None]:
training_args = TrainingArguments("./train")
training_args.evaluate_during_training = True
training_args.adam_epsilon = 1e-8
training_args.fp16 = True
training_args.per_device_train_batch_size = 4
training_args.per_device_eval_batch_size = 16
training_args.gradient_accumulation_steps = 8
training_args.num_train_epochs= 6
training_args.evaluation_strategy = EvaluationStrategy.EPOCH
training_args.save_steps = 100000
training_args.disable_tqdm = True
training_args.lr_scheduler_type = 'cosine'

In [None]:
steps_per_epoch = (len(selected_dataset.train)// (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps))
total_steps = steps_per_epoch * training_args.num_train_epochs
print(steps_per_epoch)
print(total_steps)

124
744


In [None]:
trainer = Trainer(
    args=training_args,
    train_dataset=train_dataset, 
    eval_dataset=test_dataset, 
    model_init=model_init,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

In [None]:
def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 2e-5, 7e-5, step=1e-5),
        "seed": trial.suggest_categorical("seed", [0, 1, 42, 666, 123, 12345]),
        "warmup_steps": trial.suggest_int("warmup_steps",0,total_steps*0.1,step=total_steps*0.1*0.5)
    }

search_space = {
    "learning_rate":  list(np.arange(2e-5, 7e-5, 1e-5)),
    "seed":  [0, 1, 42, 666, 123, 12345],
    "warmup_steps": list(range(0, int((total_steps)*0.1)+1, int(total_steps*0.1*0.5)))
}
search_space

{'learning_rate': [2e-05,
  3.0000000000000004e-05,
  4.000000000000001e-05,
  5.000000000000001e-05,
  6.000000000000001e-05],
 'seed': [0, 1, 42, 666, 123, 12345],
 'warmup_steps': [0, 37, 74]}

In [None]:
def my_objective(metrics):
    return metrics['eval_f1']

In [None]:
name = "NER-arabert-large-v02"

In [None]:
best_run = trainer.hyperparameter_search(direction="maximize",
                                         hp_space=my_hp_space,
                                         compute_objective=my_objective,
                                         n_trials=None,
                                         pruner=optuna.pruners.NopPruner(),
                                         sampler=optuna.samplers.GridSampler(search_space),
                                         study_name="NER-arabert-large-v02",
                                         storage="sqlite:////content/drive/MyDrive/optuna_runs/{}.db".format(name),
                                         load_if_exists=False
                                         )

[32m[I 2021-01-06 19:52:08,426][0m A new study created in RDB with name: NER-arabert-large-v02[0m
Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model f

{'eval_loss': 0.14968341588974, 'eval_accuracy_score': 0.9664003853719241, 'eval_precision': 0.802754331408263, 'eval_recall': 0.7915024091108191, 'eval_f1': 0.7970886634318484, 'eval_runtime': 12.5556, 'eval_samples_per_second': 73.673, 'epoch': 0.9979879275653923}
{'eval_loss': 0.14608950912952423, 'eval_accuracy_score': 0.9681666733571515, 'eval_precision': 0.8205709107385591, 'eval_recall': 0.7932544897065265, 'eval_f1': 0.8066815144766147, 'eval_runtime': 12.4532, 'eval_samples_per_second': 74.278, 'epoch': 1.9979879275653922}
{'eval_loss': 0.1497354358434677, 'eval_accuracy_score': 0.9700533900686443, 'eval_precision': 0.8355173971983733, 'eval_recall': 0.8098992553657468, 'eval_f1': 0.822508896797153, 'eval_runtime': 12.4806, 'eval_samples_per_second': 74.115, 'epoch': 2.9979879275653922}
{'eval_loss': 0.15973614156246185, 'eval_accuracy_score': 0.970213961703665, 'eval_precision': 0.835820895522388, 'eval_recall': 0.80946123521682, 'eval_f1': 0.822429906542056, 'eval_runtime': 

[32m[I 2021-01-06 20:05:49,234][0m Trial 0 finished with value: 0.82018226272505 and parameters: {'learning_rate': 6.000000000000001e-05, 'seed': 123, 'warmup_steps': 0}. Best is trial 0 with value: 0.82018226272505.[0m


{'eval_loss': 0.16720832884311676, 'eval_accuracy_score': 0.9703343904299305, 'eval_precision': 0.8325812274368231, 'eval_recall': 0.8081471747700394, 'eval_f1': 0.82018226272505, 'eval_runtime': 12.3937, 'eval_samples_per_second': 74.635, 'epoch': 5.997987927565393}
{'train_runtime': 795.8278, 'train_samples_per_second': 0.935, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.16222655773162842, 'eval_accuracy_score': 0.9620649512263658, 'eval_precision': 0.8064965197215778, 'eval_recall': 0.7612790188348664, 'eval_f1': 0.783235691753042, 'eval_runtime': 12.5336, 'eval_samples_per_second': 73.802, 'epoch': 0.9979879275653923}
{'eval_loss': 0.14534486830234528, 'eval_accuracy_score': 0.9666412428244551, 'eval_precision': 0.8104371097234612, 'eval_recall': 0.7958826106000876, 'eval_f1': 0.8030939226519337, 'eval_runtime': 12.461, 'eval_samples_per_second': 74.232, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14148828387260437, 'eval_accuracy_score': 0.9684075308096824, 'eval_precision': 0.8211564320932317, 'eval_recall': 0.8024529128339903, 'eval_f1': 0.8116969428444839, 'eval_runtime': 12.4885, 'eval_samples_per_second': 74.068, 'epoch': 2.9979879275653922}
{'eval_loss': 0.1439645141363144, 'eval_accuracy_score': 0.96900967444101, 'eval_precision': 0.8254822790489008, 'eval_recall': 0.8059570740254052, 'eval_f1': 0.8156028368794326, 'eval_runt

[32m[I 2021-01-06 20:19:24,231][0m Trial 1 finished with value: 0.8176600441501104 and parameters: {'learning_rate': 2e-05, 'seed': 42, 'warmup_steps': 0}. Best is trial 0 with value: 0.82018226272505.[0m


{'eval_loss': 0.1451595425605774, 'eval_accuracy_score': 0.9696118180723375, 'eval_precision': 0.8242100578549176, 'eval_recall': 0.8112133158125274, 'eval_f1': 0.8176600441501104, 'eval_runtime': 12.4537, 'eval_samples_per_second': 74.275, 'epoch': 5.997987927565393}
{'train_runtime': 800.0686, 'train_samples_per_second': 0.93, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.16658826172351837, 'eval_accuracy_score': 0.9608205210549556, 'eval_precision': 0.7775218793182865, 'eval_recall': 0.7393780113885239, 'eval_f1': 0.7579703637180062, 'eval_runtime': 12.3929, 'eval_samples_per_second': 74.639, 'epoch': 0.9979879275653923}
{'eval_loss': 0.15009905397891998, 'eval_accuracy_score': 0.9678455300871102, 'eval_precision': 0.8320074871314928, 'eval_recall': 0.7787998247919404, 'eval_f1': 0.804524886877828, 'eval_runtime': 12.3873, 'eval_samples_per_second': 74.674, 'epoch': 1.9979879275653922}
{'eval_loss': 0.13599847257137299, 'eval_accuracy_score': 0.9703343904299305, 'eval_precision': 0.8415300546448088, 'eval_recall': 0.80946123521682, 'eval_f1': 0.8251841929002011, 'eval_runtime': 12.5291, 'eval_samples_per_second': 73.828, 'epoch': 2.9979879275653922}
{'eval_loss': 0.14517879486083984, 'eval_accuracy_score': 0.9704949620649512, 'eval_precision': 0.8349864743011722, 'eval_recall': 0.8112133158125274, 'eval_f1': 0.8229282381692958, 'eval_ru

[32m[I 2021-01-06 20:32:53,286][0m Trial 2 finished with value: 0.8212689901697944 and parameters: {'learning_rate': 6.000000000000001e-05, 'seed': 123, 'warmup_steps': 74}. Best is trial 2 with value: 0.8212689901697944.[0m


{'eval_loss': 0.159962460398674, 'eval_accuracy_score': 0.9699329613423788, 'eval_precision': 0.8381212950296397, 'eval_recall': 0.8050810337275515, 'eval_f1': 0.8212689901697944, 'eval_runtime': 12.2315, 'eval_samples_per_second': 75.624, 'epoch': 5.997987927565393}
{'train_runtime': 794.7682, 'train_samples_per_second': 0.936, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.1652948558330536, 'eval_accuracy_score': 0.9615029505037935, 'eval_precision': 0.7961520842876775, 'eval_recall': 0.7612790188348664, 'eval_f1': 0.7783251231527093, 'eval_runtime': 12.2261, 'eval_samples_per_second': 75.658, 'epoch': 0.9979879275653923}
{'eval_loss': 0.14061038196086884, 'eval_accuracy_score': 0.9675243868170688, 'eval_precision': 0.8112788632326821, 'eval_recall': 0.8002628120893561, 'eval_f1': 0.8057331863285557, 'eval_runtime': 12.2606, 'eval_samples_per_second': 75.445, 'epoch': 1.9979879275653922}
{'eval_loss': 0.13811887800693512, 'eval_accuracy_score': 0.9689293886234996, 'eval_precision': 0.8247882300490414, 'eval_recall': 0.8103372755146737, 'eval_f1': 0.8174988952717631, 'eval_runtime': 12.3675, 'eval_samples_per_second': 74.793, 'epoch': 2.9979879275653922}
{'eval_loss': 0.14436091482639313, 'eval_accuracy_score': 0.9691702460760306, 'eval_precision': 0.8278798744957419, 'eval_recall': 0.8090232150678931, 'eval_f1': 0.8183429330970314, 'eval_

[32m[I 2021-01-06 20:46:21,043][0m Trial 3 finished with value: 0.8230088495575222 and parameters: {'learning_rate': 2e-05, 'seed': 42, 'warmup_steps': 37}. Best is trial 3 with value: 0.8230088495575222.[0m


{'eval_loss': 0.14580576121807098, 'eval_accuracy_score': 0.9700533900686443, 'eval_precision': 0.8314707197139025, 'eval_recall': 0.8147174770039421, 'eval_f1': 0.8230088495575222, 'eval_runtime': 12.3025, 'eval_samples_per_second': 75.188, 'epoch': 5.997987927565393}
{'train_runtime': 793.6147, 'train_samples_per_second': 0.937, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.14605310559272766, 'eval_accuracy_score': 0.9674039580908033, 'eval_precision': 0.8357109986194201, 'eval_recall': 0.7954445904511608, 'eval_f1': 0.8150807899461401, 'eval_runtime': 12.5349, 'eval_samples_per_second': 73.794, 'epoch': 0.9979879275653923}
{'eval_loss': 0.13993820548057556, 'eval_accuracy_score': 0.9701336758861547, 'eval_precision': 0.822400713966979, 'eval_recall': 0.8072711344721857, 'eval_f1': 0.8147656940760388, 'eval_runtime': 12.3697, 'eval_samples_per_second': 74.78, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14386527240276337, 'eval_accuracy_score': 0.9703745333386857, 'eval_precision': 0.8413699864803966, 'eval_recall': 0.8177836180464302, 'eval_f1': 0.8294091514882276, 'eval_runtime': 12.5571, 'eval_samples_per_second': 73.664, 'epoch': 2.9979879275653922}
{'eval_loss': 0.15302197635173798, 'eval_accuracy_score': 0.9710569627875236, 'eval_precision': 0.8432700993676604, 'eval_recall': 0.8177836180464302, 'eval_f1': 0.8303313319991106, 'eval_r

[32m[I 2021-01-06 20:59:55,314][0m Trial 4 finished with value: 0.8322981366459629 and parameters: {'learning_rate': 6.000000000000001e-05, 'seed': 666, 'warmup_steps': 37}. Best is trial 4 with value: 0.8322981366459629.[0m


{'eval_loss': 0.1557389199733734, 'eval_accuracy_score': 0.9714182489663201, 'eval_precision': 0.8431460674157303, 'eval_recall': 0.8217257993867718, 'eval_f1': 0.8322981366459629, 'eval_runtime': 12.4299, 'eval_samples_per_second': 74.417, 'epoch': 5.997987927565393}
{'train_runtime': 799.1988, 'train_samples_per_second': 0.931, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.14869055151939392, 'eval_accuracy_score': 0.9663200995544137, 'eval_precision': 0.8041282389108476, 'eval_recall': 0.8020148926850635, 'eval_f1': 0.8030701754385966, 'eval_runtime': 12.3688, 'eval_samples_per_second': 74.785, 'epoch': 0.9979879275653923}
{'eval_loss': 0.13985051214694977, 'eval_accuracy_score': 0.96900967444101, 'eval_precision': 0.8352835283528353, 'eval_recall': 0.8129653964082347, 'eval_f1': 0.8239733629300776, 'eval_runtime': 12.5626, 'eval_samples_per_second': 73.631, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14176931977272034, 'eval_accuracy_score': 0.9709766769700132, 'eval_precision': 0.8369905956112853, 'eval_recall': 0.8186596583442839, 'eval_f1': 0.8277236492471214, 'eval_runtime': 12.3058, 'eval_samples_per_second': 75.168, 'epoch': 2.9979879275653922}
{'eval_loss': 0.1505139321088791, 'eval_accuracy_score': 0.9708161053349925, 'eval_precision': 0.8388838883888389, 'eval_recall': 0.8164695575996496, 'eval_f1': 0.8275249722530521, 'eval_ru

[32m[I 2021-01-06 21:13:24,958][0m Trial 5 finished with value: 0.8326693227091634 and parameters: {'learning_rate': 5.000000000000001e-05, 'seed': 42, 'warmup_steps': 0}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.15310879051685333, 'eval_accuracy_score': 0.9712576773312994, 'eval_precision': 0.8416107382550335, 'eval_recall': 0.823915900131406, 'eval_f1': 0.8326693227091634, 'eval_runtime': 12.4731, 'eval_samples_per_second': 74.16, 'epoch': 5.997987927565393}
{'train_runtime': 795.4501, 'train_samples_per_second': 0.935, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.15217162668704987, 'eval_accuracy_score': 0.9663200995544137, 'eval_precision': 0.794351279788173, 'eval_recall': 0.7884362680683311, 'eval_f1': 0.7913827214772478, 'eval_runtime': 12.4719, 'eval_samples_per_second': 74.167, 'epoch': 0.9979879275653923}
{'eval_loss': 0.14081041514873505, 'eval_accuracy_score': 0.9701336758861547, 'eval_precision': 0.822594880847308, 'eval_recall': 0.8164695575996496, 'eval_f1': 0.8195207737964387, 'eval_runtime': 12.4517, 'eval_samples_per_second': 74.287, 'epoch': 1.9979879275653922}
{'eval_loss': 0.15071506798267365, 'eval_accuracy_score': 0.9696519609810927, 'eval_precision': 0.8277877697841727, 'eval_recall': 0.806395094174332, 'eval_f1': 0.8169514089194586, 'eval_runtime': 12.4466, 'eval_samples_per_second': 74.318, 'epoch': 2.9979879275653922}
{'eval_loss': 0.15828166902065277, 'eval_accuracy_score': 0.969732246798603, 'eval_precision': 0.823943661971831, 'eval_recall': 0.8199737187910644, 'eval_f1': 0.821953896816685, 'eval_runti

[32m[I 2021-01-06 21:27:14,968][0m Trial 6 finished with value: 0.8251101321585902 and parameters: {'learning_rate': 6.000000000000001e-05, 'seed': 12345, 'warmup_steps': 0}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.1653466671705246, 'eval_accuracy_score': 0.9703745333386857, 'eval_precision': 0.8298626495347807, 'eval_recall': 0.8204117389399912, 'eval_f1': 0.8251101321585902, 'eval_runtime': 13.4926, 'eval_samples_per_second': 68.556, 'epoch': 5.997987927565393}
{'train_runtime': 801.1079, 'train_samples_per_second': 0.929, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.16469930112361908, 'eval_accuracy_score': 0.9651158122917587, 'eval_precision': 0.8257611241217798, 'eval_recall': 0.7722295225580377, 'eval_f1': 0.7980986871887732, 'eval_runtime': 12.4247, 'eval_samples_per_second': 74.449, 'epoch': 0.9979879275653923}
{'eval_loss': 0.13692522048950195, 'eval_accuracy_score': 0.9698928184336237, 'eval_precision': 0.8266309204647007, 'eval_recall': 0.8103372755146737, 'eval_f1': 0.8184030081840301, 'eval_runtime': 12.4685, 'eval_samples_per_second': 74.187, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14905139803886414, 'eval_accuracy_score': 0.970695676608727, 'eval_precision': 0.8394226432115471, 'eval_recall': 0.8151554971528691, 'eval_f1': 0.8271111111111111, 'eval_runtime': 12.5429, 'eval_samples_per_second': 73.747, 'epoch': 2.9979879275653922}
{'eval_loss': 0.1504199504852295, 'eval_accuracy_score': 0.9715788206013408, 'eval_precision': 0.8405211141060198, 'eval_recall': 0.8195356986421375, 'eval_f1': 0.8298957640275005, 'eval_r

[32m[I 2021-01-06 21:41:04,163][0m Trial 7 finished with value: 0.832110295752724 and parameters: {'learning_rate': 5.000000000000001e-05, 'seed': 0, 'warmup_steps': 37}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.16083067655563354, 'eval_accuracy_score': 0.9712576773312994, 'eval_precision': 0.8450767841011744, 'eval_recall': 0.8195356986421375, 'eval_f1': 0.832110295752724, 'eval_runtime': 12.4706, 'eval_samples_per_second': 74.174, 'epoch': 5.997987927565393}
{'train_runtime': 800.6381, 'train_samples_per_second': 0.929, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.14369803667068481, 'eval_accuracy_score': 0.9675243868170688, 'eval_precision': 0.8102222222222222, 'eval_recall': 0.7985107314936487, 'eval_f1': 0.8043238473417162, 'eval_runtime': 12.5132, 'eval_samples_per_second': 73.922, 'epoch': 0.9979879275653923}
{'eval_loss': 0.14006587862968445, 'eval_accuracy_score': 0.9694512464373168, 'eval_precision': 0.8135593220338984, 'eval_recall': 0.8199737187910644, 'eval_f1': 0.8167539267015707, 'eval_runtime': 12.5392, 'eval_samples_per_second': 73.769, 'epoch': 1.9979879275653922}
{'eval_loss': 0.13987639546394348, 'eval_accuracy_score': 0.970213961703665, 'eval_precision': 0.8281389136242209, 'eval_recall': 0.8147174770039421, 'eval_f1': 0.821373371605211, 'eval_runtime': 12.5195, 'eval_samples_per_second': 73.885, 'epoch': 2.9979879275653922}
{'eval_loss': 0.15173552930355072, 'eval_accuracy_score': 0.9706555336999719, 'eval_precision': 0.8300884955752212, 'eval_recall': 0.8217257993867718, 'eval_f1': 0.8258859784283514, 'eval_r

[32m[I 2021-01-06 21:54:36,897][0m Trial 8 finished with value: 0.8214522180534098 and parameters: {'learning_rate': 4.000000000000001e-05, 'seed': 12345, 'warmup_steps': 0}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.15830431878566742, 'eval_accuracy_score': 0.9705752478824615, 'eval_precision': 0.827846975088968, 'eval_recall': 0.8151554971528691, 'eval_f1': 0.8214522180534098, 'eval_runtime': 12.3759, 'eval_samples_per_second': 74.742, 'epoch': 5.997987927565393}
{'train_runtime': 798.0757, 'train_samples_per_second': 0.932, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.13126732409000397, 'eval_accuracy_score': 0.9705752478824615, 'eval_precision': 0.8194993412384717, 'eval_recall': 0.8173455978975033, 'eval_f1': 0.8184210526315789, 'eval_runtime': 12.5629, 'eval_samples_per_second': 73.63, 'epoch': 0.9979879275653923}
{'eval_loss': 0.19756591320037842, 'eval_accuracy_score': 0.9571273734494802, 'eval_precision': 0.7854406130268199, 'eval_recall': 0.718353044240035, 'eval_f1': 0.750400366048959, 'eval_runtime': 12.5069, 'eval_samples_per_second': 73.959, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14620041847229004, 'eval_accuracy_score': 0.9686885311709686, 'eval_precision': 0.8307830783078308, 'eval_recall': 0.8085851949189663, 'eval_f1': 0.8195338512763597, 'eval_runtime': 12.4833, 'eval_samples_per_second': 74.099, 'epoch': 2.9979879275653922}
{'eval_loss': 0.14146599173545837, 'eval_accuracy_score': 0.9710168198787684, 'eval_precision': 0.833185053380783, 'eval_recall': 0.8204117389399912, 'eval_f1': 0.8267490620172148, 'eval_run

[32m[I 2021-01-06 22:08:05,414][0m Trial 9 finished with value: 0.8285777482857773 and parameters: {'learning_rate': 5.000000000000001e-05, 'seed': 1, 'warmup_steps': 0}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.14718475937843323, 'eval_accuracy_score': 0.9711372486050339, 'eval_precision': 0.8369079535299374, 'eval_recall': 0.8204117389399912, 'eval_f1': 0.8285777482857773, 'eval_runtime': 12.5179, 'eval_samples_per_second': 73.894, 'epoch': 5.997987927565393}
{'train_runtime': 793.9273, 'train_samples_per_second': 0.937, 'epoch': 5.997987927565393}


Some weights of the model checkpoint at aubmindlab/bert-large-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

{'eval_loss': 0.1570618599653244, 'eval_accuracy_score': 0.9633896672152864, 'eval_precision': 0.8187762727697337, 'eval_recall': 0.7678493210687691, 'eval_f1': 0.7924954792043399, 'eval_runtime': 12.4615, 'eval_samples_per_second': 74.229, 'epoch': 0.9979879275653923}
{'eval_loss': 0.1403229534626007, 'eval_accuracy_score': 0.9693709606198065, 'eval_precision': 0.8144375553587245, 'eval_recall': 0.8055190538764783, 'eval_f1': 0.8099537546795859, 'eval_runtime': 12.5189, 'eval_samples_per_second': 73.889, 'epoch': 1.9979879275653922}
{'eval_loss': 0.14377336204051971, 'eval_accuracy_score': 0.9696118180723375, 'eval_precision': 0.8271549799017418, 'eval_recall': 0.8112133158125274, 'eval_f1': 0.8191065900044228, 'eval_runtime': 12.4886, 'eval_samples_per_second': 74.067, 'epoch': 2.9979879275653922}
{'eval_loss': 0.14475049078464508, 'eval_accuracy_score': 0.9709766769700132, 'eval_precision': 0.8302895322939866, 'eval_recall': 0.8164695575996496, 'eval_f1': 0.823321554770318, 'eval_ru

[32m[I 2021-01-06 22:21:39,242][0m Trial 10 finished with value: 0.8267140004437542 and parameters: {'learning_rate': 3.0000000000000004e-05, 'seed': 0, 'warmup_steps': 37}. Best is trial 5 with value: 0.8326693227091634.[0m


{'eval_loss': 0.15130630135536194, 'eval_accuracy_score': 0.9703343904299305, 'eval_precision': 0.8376798561151079, 'eval_recall': 0.8160315374507228, 'eval_f1': 0.8267140004437542, 'eval_runtime': 12.4236, 'eval_samples_per_second': 74.455, 'epoch': 5.997987927565393}
{'train_runtime': 795.1332, 'train_samples_per_second': 0.936, 'epoch': 5.997987927565393}


In [None]:
best_run

#Regular Training

In [None]:
training_args = TrainingArguments("./train")
training_args.evaluate_during_training = True
training_args.adam_epsilon = 1e-8
training_args.learning_rate = 5e-5
training_args.fp16 = True
training_args.per_device_train_batch_size = 16
training_args.per_device_eval_batch_size = 16
training_args.gradient_accumulation_steps = 2
training_args.num_train_epochs= 8


steps_per_epoch = (len(selected_dataset.train)// (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps))
total_steps = steps_per_epoch * training_args.num_train_epochs
print(steps_per_epoch)
print(total_steps)
#Warmup_ratio
warmup_ratio = 0.1
training_args.warmup_steps = total_steps*warmup_ratio

training_args.evaluation_strategy = EvaluationStrategy.EPOCH
# training_args.logging_steps = 200
training_args.save_steps = 100000 #don't want to save any model
training_args.seed = 42
training_args.disable_tqdm = False
training_args.lr_scheduler_type = 'cosine'

In [None]:
trainer = Trainer(
    model = model_init(),
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at aubmindlab/bert-base-arabertv02 were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initia

In [None]:
trainer.train()


Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate



Epoch,Training Loss,Validation Loss,Accuracy Score,Precision,Recall,F1,Runtime,Samples Per Second
0,No log,0.16144,0.962306,0.820141,0.766973,0.792666,8.5845,107.752
1,No log,0.145242,0.966962,0.830258,0.788436,0.808807,8.4375,109.629
2,No log,0.148088,0.969732,0.834677,0.816032,0.825249,8.4704,109.204
3,No log,0.144629,0.971499,0.840444,0.828296,0.834326,8.4261,109.779
4,0.111123,0.15306,0.970776,0.845632,0.818222,0.831701,8.7326,105.925
5,0.111123,0.159308,0.970896,0.844485,0.818222,0.831146,8.5441,108.262
6,0.111123,0.160349,0.971097,0.842318,0.821288,0.83167,8.6061,107.483
7,0.111123,0.160672,0.971057,0.842484,0.819974,0.831077,8.4303,109.723


TrainOutput(global_step=992, training_loss=0.06601395337812362, metrics={'train_runtime': 376.3862, 'train_samples_per_second': 2.636, 'total_flos': 6570640586138112, 'epoch': 7.995983935742972})

In [None]:
trainer.save_model("SOME_PATH")