# Dependencies

In [2]:
import os
import re
import warnings
import numpy as np
import pandas as pd
import torch
import transformers

from torch.utils.data import Dataset, random_split
from datasets import load_metric
from transformers import AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from transformers.utils import logging
from sklearn.model_selection import train_test_split

In [3]:
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings("ignore")
# logging.set_verbosity_error()

np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f1a7bfb7910>

# Load Data

In [4]:
data = pd.read_json("advanced/nlp.jsonl", lines=True)
data

Unnamed: 0,key,transcript,tool,heading,target
0,0,"Turret, prepare to deploy electromagnetic pulse. Heading zero six five, target is grey and white fighter jet. Engage when ready.",electromagnetic pulse,65,grey and white fighter jet
1,1,"Engage yellow drone with surface-to-air missiles, heading two three five.",surface-to-air missiles,235,yellow drone
2,2,"Control to turrets, deploy electromagnetic pulse, heading one one zero. Engage blue and red fighter plane. Repeat, deploy EMP, heading one one zero, engage blue and red fighter aircraft. Over.",electromagnetic pulse,110,blue and red fighter plane
3,3,"Alfa, Echo, Mike Papa, deploy EMP tool heading zero eight five, engage purple, red, and silver fighter jet.",EMP,85,"purple, red, and silver fighter jet"
4,4,"Engage the grey, black, and green fighter plane with the machine gun, heading zero niner five.",machine gun,95,"grey, black, and green fighter plane"
...,...,...,...,...,...
3495,3495,Deploy electromagnetic pulse on brown commercial aircraft at heading three five zero.,electromagnetic pulse,350,brown commercial aircraft
3496,3496,"Deploy surface-to-air missiles, heading two one five, engage silver, orange, and brown helicopter.",surface-to-air missiles,215,"silver, orange, and brown helicopter"
3497,3497,"Engage target, grey, orange, and silver missile, with surface-to-air missiles. Heading zero eight zero. Target locked, prepare to fire.",surface-to-air missiles,80,"grey, orange, and silver missile"
3498,3498,Engage the white drone at heading zero five five with the machine gun.,machine gun,55,white drone


#  Feature Extraction

In [5]:
number_to_word = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "niner"]

def convert_number_to_word(heading):
    words = []
    
    for num in str(heading):
        words.append(number_to_word[int(num)])
    
    if len(words) < 3:
        words.insert(0, number_to_word[0])
        
    return " ".join(words).strip()

data["heading_words"] = data.apply(lambda row: convert_number_to_word(row["heading"]), axis=1)
data

Unnamed: 0,key,transcript,tool,heading,target,heading_words
0,0,"Turret, prepare to deploy electromagnetic pulse. Heading zero six five, target is grey and white fighter jet. Engage when ready.",electromagnetic pulse,65,grey and white fighter jet,zero six five
1,1,"Engage yellow drone with surface-to-air missiles, heading two three five.",surface-to-air missiles,235,yellow drone,two three five
2,2,"Control to turrets, deploy electromagnetic pulse, heading one one zero. Engage blue and red fighter plane. Repeat, deploy EMP, heading one one zero, engage blue and red fighter aircraft. Over.",electromagnetic pulse,110,blue and red fighter plane,one one zero
3,3,"Alfa, Echo, Mike Papa, deploy EMP tool heading zero eight five, engage purple, red, and silver fighter jet.",EMP,85,"purple, red, and silver fighter jet",zero eight five
4,4,"Engage the grey, black, and green fighter plane with the machine gun, heading zero niner five.",machine gun,95,"grey, black, and green fighter plane",zero niner five
...,...,...,...,...,...,...
3495,3495,Deploy electromagnetic pulse on brown commercial aircraft at heading three five zero.,electromagnetic pulse,350,brown commercial aircraft,three five zero
3496,3496,"Deploy surface-to-air missiles, heading two one five, engage silver, orange, and brown helicopter.",surface-to-air missiles,215,"silver, orange, and brown helicopter",two one five
3497,3497,"Engage target, grey, orange, and silver missile, with surface-to-air missiles. Heading zero eight zero. Target locked, prepare to fire.",surface-to-air missiles,80,"grey, orange, and silver missile",zero eight zero
3498,3498,Engage the white drone at heading zero five five with the machine gun.,machine gun,55,white drone,zero five five


In [6]:
def generate_tokens_labels(row, label_to_idx):
    labels = []
    words = re.findall(r"[\w-]+|[.,!?;]", row["transcript"])
    tool = re.findall(r"[\w-]+|[.,!?;]", row["tool"])
    target = re.findall(r"[\w-]+|[.,!?;]", row["target"])
    heading_words = re.findall(r"[\w-]+|[.,!?;]", row["heading_words"])
    
    # generate ner label indices for each sample
    i = 0
    while i < len(words):
        word = words[i]
        
        if word == tool[0]:
            labels.append(label_to_idx["B-TOOL"])
            labels.extend([label_to_idx["I-TOOL"]] * (len(tool) - 1))
            i += len(tool)
        elif word == target[0]:
            labels.append(label_to_idx["B-TAR"])
            labels.extend([label_to_idx["I-TAR"]] * (len(target) - 1))
            i += len(target)
        elif word == heading_words[0]:
            labels.append(label_to_idx["B-HEAD"])
            labels.extend([label_to_idx["I-HEAD"]] * (len(heading_words) - 1))
            i += len(heading_words)
        else:
            labels.append(label_to_idx["O"])
            i += 1
            
    row["words"] = [word.lower() for word in words]
    row["labels"] = labels
    
    # check number of words matches number of labels
    return row

idx_to_label = {0: "O", 1: "B-TOOL", 2: "I-TOOL", 3: "B-TAR", 4: "I-TAR", 5: "B-HEAD", 6: "I-HEAD"}
label_to_idx = {}
for idx, label in idx_to_label.items():
    label_to_idx[label] = idx

data = data.apply(lambda row: generate_tokens_labels(row, label_to_idx), axis=1)
data.head(9)

Unnamed: 0,key,transcript,tool,heading,target,heading_words,words,labels
0,0,"Turret, prepare to deploy electromagnetic pulse. Heading zero six five, target is grey and white fighter jet. Engage when ready.",electromagnetic pulse,65,grey and white fighter jet,zero six five,"[turret, ,, prepare, to, deploy, electromagnetic, pulse, ., heading, zero, six, five, ,, target, is, grey, and, white, fighter, jet, ., engage, when, ready, .]","[0, 0, 0, 0, 0, 1, 2, 0, 0, 5, 6, 6, 0, 0, 0, 3, 4, 4, 4, 4, 0, 0, 0, 0, 0]"
1,1,"Engage yellow drone with surface-to-air missiles, heading two three five.",surface-to-air missiles,235,yellow drone,two three five,"[engage, yellow, drone, with, surface-to-air, missiles, ,, heading, two, three, five, .]","[0, 3, 4, 0, 1, 2, 0, 0, 5, 6, 6, 0]"
2,2,"Control to turrets, deploy electromagnetic pulse, heading one one zero. Engage blue and red fighter plane. Repeat, deploy EMP, heading one one zero, engage blue and red fighter aircraft. Over.",electromagnetic pulse,110,blue and red fighter plane,one one zero,"[control, to, turrets, ,, deploy, electromagnetic, pulse, ,, heading, one, one, zero, ., engage, blue, and, red, fighter, plane, ., repeat, ,, deploy, emp, ,, heading, one, one, zero, ,, engage, blue, and, red, fighter, aircraft, ., over, .]","[0, 0, 0, 0, 0, 1, 2, 0, 0, 5, 6, 6, 0, 0, 3, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 0, 0, 3, 4, 4, 4, 4, 0, 0, 0]"
3,3,"Alfa, Echo, Mike Papa, deploy EMP tool heading zero eight five, engage purple, red, and silver fighter jet.",EMP,85,"purple, red, and silver fighter jet",zero eight five,"[alfa, ,, echo, ,, mike, papa, ,, deploy, emp, tool, heading, zero, eight, five, ,, engage, purple, ,, red, ,, and, silver, fighter, jet, .]","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 5, 6, 6, 0, 0, 3, 4, 4, 4, 4, 4, 4, 4, 0]"
4,4,"Engage the grey, black, and green fighter plane with the machine gun, heading zero niner five.",machine gun,95,"grey, black, and green fighter plane",zero niner five,"[engage, the, grey, ,, black, ,, and, green, fighter, plane, with, the, machine, gun, ,, heading, zero, niner, five, .]","[0, 0, 3, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 2, 0, 0, 5, 6, 6, 0]"
5,5,"Air defense turret, lock onto target black and yellow missile at heading zero one five. Deploy EMP.",EMP,15,black and yellow missile,zero one five,"[air, defense, turret, ,, lock, onto, target, black, and, yellow, missile, at, heading, zero, one, five, ., deploy, emp, .]","[0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 5, 6, 6, 0, 0, 1, 0]"
6,6,"Control, heading zero eight five, deploy electromagnetic pulse on the green, grey, and brown cargo aircraft target. Over.",electromagnetic pulse,85,"green, grey, and brown cargo aircraft",zero eight five,"[control, ,, heading, zero, eight, five, ,, deploy, electromagnetic, pulse, on, the, green, ,, grey, ,, and, brown, cargo, aircraft, target, ., over, .]","[0, 0, 0, 5, 6, 6, 0, 0, 1, 2, 0, 0, 3, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0]"
7,7,"Control to air defense turrets, prepare to engage target at heading zero four five. Deploy anti-air artillery against white light aircraft. Execute with extreme precision and caution.",anti-air artillery,45,white light aircraft,zero four five,"[control, to, air, defense, turrets, ,, prepare, to, engage, target, at, heading, zero, four, five, ., deploy, anti-air, artillery, against, white, light, aircraft, ., execute, with, extreme, precision, and, caution, .]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 0, 0, 1, 2, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0]"
8,8,"Control here, this is a priority alert. Deploy anti-air artillery to intercept the orange and blue light aircraft heading two one five. Execute immediately. Over.",anti-air artillery,215,orange and blue light aircraft,two one five,"[control, here, ,, this, is, a, priority, alert, ., deploy, anti-air, artillery, to, intercept, the, orange, and, blue, light, aircraft, heading, two, one, five, ., execute, immediately, ., over, .]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 3, 4, 4, 4, 4, 0, 5, 6, 6, 0, 0, 0, 0, 0, 0]"


In [7]:
data['target'].nunique()

1884

# Split Data

In [6]:
data_splits = {}
data_splits["train"], data_splits["val"], data_splits["test"] = random_split(data.to_dict(orient="index"), [0.8, 0.1, 0.1])
# data_splits["train"][0]

In [7]:
class NERDataset(Dataset):
    def __init__(self, data, tokenizer, idx_to_label, label_to_idx, max_len):
        self.tokens = []
        self.labels = []
        self.tokenizer = tokenizer
        self.max_len = max_len
        
        # tokenize words to subwords and adjust labels accordingly
        for row in data:
            row_tokens = []
            row_labels = []
            
            for i in range(len(row["words"])):
                word = row["words"][i]
                label = row["labels"][i]
                
                tokenized_word = tokenizer.tokenize(word)
                n_subwords = len(tokenized_word)
                
                row_tokens.extend(tokenized_word)
                
                if label == 0:
                    row_labels.extend([label] * n_subwords)
                elif idx_to_label[label][0] == "B":
                    row_labels.append(label)
                    row_labels.extend([label + 1] * (n_subwords - 1))
                else:
                    row_labels.extend([label] * n_subwords)
                    
            self.tokens.append(row_tokens)
            self.labels.append(row_labels)
            
    def __len__(self):
        return len(self.tokens) if len(self.tokens) == len(self.labels) else -1
    
    def __getitem__(self, idx):
        input_tokens = self.tokens[idx]
        input_labels = self.labels[idx]
        attention_mask = [1] * len(input_tokens)

        input_ids = self.tokenizer.convert_tokens_to_ids(input_tokens)
        
        # pad and truncate all inputs to fit model input size (bert = 512)
        input_ids = self.pad_and_truncate(input_ids, self.tokenizer.pad_token_id)
        input_labels = self.pad_and_truncate(input_labels, 0)
        attention_mask = self.pad_and_truncate(attention_mask, 0)
        
        return {
            "input_ids": torch.as_tensor(input_ids),
            "labels": torch.as_tensor(input_labels),
            "attention_mask": torch.as_tensor(attention_mask)
        }
                
    def pad_and_truncate(self, inputs, pad_id: int):
        if len(inputs) < self.max_len:
            padded_inputs = inputs + [pad_id] * (self.max_len - len(inputs))
        else:
            padded_inputs = inputs[: self.max_len]

        return padded_inputs

# Preprocess Dataset & Load Model

In [8]:
# load tokenizer and model from huggingface
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [9]:
datasets = {
    "train": NERDataset(
        data=data_splits["train"], 
        tokenizer=tokenizer,
        idx_to_label=idx_to_label,
        label_to_idx=label_to_idx,
        max_len=512
    ),
    "val": NERDataset(
        data=data_splits["val"], 
        tokenizer=tokenizer,
        idx_to_label=idx_to_label,
        label_to_idx=label_to_idx,
        max_len=512
    ),
    "test": NERDataset(
        data=data_splits["test"], 
        tokenizer=tokenizer,
        idx_to_label=idx_to_label,
        label_to_idx=label_to_idx,
        max_len=512
    )
}
# datasets["train"][0]

# Train & Hyperparameter Tune Model

In [10]:
def compute_metrics(p):
    metric = load_metric("seqeval", trust_remote_code=True)
    
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [idx_to_label[p] for (p, l) in zip(prediction, label) if l != 0]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [idx_to_label[l] for (p, l) in zip(prediction, label) if l != 0]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    agg_results = {
        "overall_precision": results["overall_precision"],
        "overall_recall": results["overall_recall"],
        "overall_f1": results["overall_f1"],
        "overall_accuracy": results["overall_accuracy"],
    }

    # for k in results.keys():
    #   if(k not in flattened_results.keys()):
    #     flattened_results[k+"_f1"]=results[k]["f1"]

    return agg_results

In [11]:
out_dir = "models/nlp"
batch_size = 16
epochs = 10
lrates = [5e-5, 3e-5]
wdecays = [0.01, 0.1]

torch.cuda.empty_cache() if torch.cuda.is_available() else None

best_model = {
    "save_trainer": None,
    "test_metrics": None,
    "lr": None,
    "wd": None
}

# hyperparameter tuning
for lr in lrates:
    for wd in wdecays:
        model = AutoModelForTokenClassification.from_pretrained(
            pretrained_model_name_or_path="google-bert/bert-base-uncased",
            label2id=label_to_idx,
            id2label=idx_to_label,
            ignore_mismatched_sizes=True,
            num_labels=len(idx_to_label)
        )
        training_args = TrainingArguments(
            output_dir=out_dir,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=epochs,
            learning_rate=lr,
            weight_decay=wd,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True
        )
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=datasets["train"],
            eval_dataset=datasets["val"],
            data_collator=data_collator,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
        )
        
        trainer.train()
        predictions, _, metrics = trainer.predict(datasets["test"])
        
        # select best model
        if best_model["test_metrics"] is None or best_model["test_metrics"]["test_loss"] > metrics["test_loss"]:
            test_loss = best_model['test_metrics']['test_loss'] if best_model['test_metrics'] else None
            print(f"Updating best loss: {test_loss} -> {metrics['test_loss']}")

            best_model["save_trainer"] = trainer
            best_model["test_metrics"] = metrics
            best_model["lr"] = lr
            best_model["wd"] = wd
        
        del model, trainer

best_model["save_trainer"].save_model(os.path.join(out_dir, "best_model"))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,No log,0.000879,0.998138,0.999068,0.998603,0.999748
2,No log,0.000743,1.0,1.0,1.0,1.0
3,0.014600,0.000547,1.0,1.0,1.0,1.0
4,0.014600,0.000492,1.0,1.0,1.0,1.0
5,0.014600,0.000438,0.999068,0.999068,0.999068,0.999748
6,0.000300,0.000363,1.0,1.0,1.0,1.0
7,0.000300,0.000413,1.0,1.0,1.0,1.0
8,0.000300,0.000289,1.0,1.0,1.0,1.0
9,0.000200,0.000339,1.0,1.0,1.0,1.0
10,0.000200,0.000328,1.0,1.0,1.0,1.0


Checkpoint destination directory models/nlp/checkpoint-175 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-350 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-525 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-700 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-875 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1050 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1225 already exists and is non-empty.Saving will proceed but saved results

Updating best loss: None -> 0.0001310683583142236


Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,No log,0.000942,1.0,1.0,1.0,1.0
2,No log,0.000609,1.0,1.0,1.0,1.0
3,0.015300,0.000425,0.999068,0.999068,0.999068,0.999748
4,0.015300,0.000397,1.0,0.999068,0.999534,0.999495
5,0.015300,0.000319,1.0,1.0,1.0,1.0
6,0.000300,0.000334,1.0,1.0,1.0,1.0
7,0.000300,0.000319,1.0,1.0,1.0,1.0
8,0.000300,0.0003,1.0,1.0,1.0,1.0
9,0.000200,0.000301,1.0,1.0,1.0,1.0
10,0.000200,0.000319,1.0,1.0,1.0,1.0


Checkpoint destination directory models/nlp/checkpoint-175 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-350 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-525 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-700 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-875 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1050 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1225 already exists and is non-empty.Saving will proceed but saved results

Updating best loss: 0.0001310683583142236 -> 4.841189365833998e-05


Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,No log,0.001111,1.0,1.0,1.0,1.0
2,No log,0.000878,1.0,1.0,1.0,1.0
3,0.019300,0.000714,1.0,1.0,1.0,1.0
4,0.019300,0.000643,1.0,1.0,1.0,1.0
5,0.019300,0.00059,1.0,1.0,1.0,1.0
6,0.000300,0.00073,1.0,1.0,1.0,1.0
7,0.000300,0.000404,1.0,1.0,1.0,1.0
8,0.000300,0.000349,1.0,1.0,1.0,1.0
9,0.000200,0.000373,1.0,1.0,1.0,1.0
10,0.000200,0.000358,1.0,1.0,1.0,1.0


Checkpoint destination directory models/nlp/checkpoint-175 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-350 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-525 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-700 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-875 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1050 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1225 already exists and is non-empty.Saving will proceed but saved results

Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,No log,0.001112,1.0,1.0,1.0,1.0
2,No log,0.000958,1.0,1.0,1.0,1.0
3,0.019300,0.000719,1.0,1.0,1.0,1.0
4,0.019300,0.000479,1.0,1.0,1.0,1.0
5,0.019300,0.000566,1.0,1.0,1.0,1.0
6,0.000300,0.000566,1.0,1.0,1.0,1.0
7,0.000300,0.000447,1.0,1.0,1.0,1.0
8,0.000300,0.000354,1.0,1.0,1.0,1.0
9,0.000200,0.00038,1.0,1.0,1.0,1.0
10,0.000200,0.000386,1.0,1.0,1.0,1.0


Checkpoint destination directory models/nlp/checkpoint-175 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-350 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-525 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-700 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-875 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1050 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory models/nlp/checkpoint-1225 already exists and is non-empty.Saving will proceed but saved results

In [12]:
best_model

{'save_trainer': <transformers.trainer.Trainer at 0x7fe0b74ff9d0>,
 'test_metrics': {'test_loss': 4.841189365833998e-05,
  'test_overall_precision': 0.9981549815498155,
  'test_overall_recall': 0.9981549815498155,
  'test_overall_f1': 0.9981549815498155,
  'test_overall_accuracy': 0.9994922569179995,
  'test_runtime': 12.7462,
  'test_samples_per_second': 27.459,
  'test_steps_per_second': 1.726},
 'lr': 5e-05,
 'wd': 0.1}

In [None]:
out_dir = "models/nlp"
batch_size = 16
epochs = 10
lr = 5e-5
wd= 0.1

torch.cuda.empty_cache() if torch.cuda.is_available() else None

# train with selected hyperparameter
model = AutoModelForTokenClassification.from_pretrained(
    pretrained_model_name_or_path="google-bert/bert-base-uncased",
    label2id=label_to_idx,
    id2label=idx_to_label,
    ignore_mismatched_sizes=True,
    num_labels=len(idx_to_label)
)
training_args = TrainingArguments(
    output_dir=out_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    learning_rate=lr,
    weight_decay=wd,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=datasets["train"],
    eval_dataset=datasets["val"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

trainer.train()
trainer.save_model(os.path.join(out_dir, "best_model"))