In [None]:
MAX_LEN = 256
bs = 32

In [None]:
import pandas as pd

In [None]:
!pip install transformers
!pip install tokenizers
import pandas as pd



In [None]:
train = '/content/NER_Irish_trainplusbacktranslation.conll'
val = '/content/NER_Irish_validation.conll'
test = '/content/NER_Irish_test.conll'

In [None]:
def read_conll_file(file_path):
    data = []
    current_sentence = []
    with open(file_path, "r", encoding="utf-8") as file:
        for line in file:
            line = line.strip()
            if line.startswith('-DOCSTART-'):
                continue
            if line:
                parts = line.split()
                word = parts[0]
                ner_label = parts[-1]
                current_sentence.append((word, ner_label))
            else:
                if current_sentence:
                    data.append(current_sentence)
                    current_sentence = []
    if current_sentence:
        data.append(current_sentence)
    return data

train_data = read_conll_file(train)
val_data = read_conll_file(val)
test_data = read_conll_file(test)

In [None]:
tag_values = [
        'O',
        'B-PER',
        'I-PER',
        'B-LOC',
        'I-LOC',
        'B-ORG',
        'I-ORG'
]

tag_values.append("PAD")
tag2idx = {t: i for i, t in enumerate(tag_values)}

In [None]:
print(len(train_data))
print(len(test_data))
print(len(val_data))

1229
140
100


# Training

In [None]:
import torch
from transformers import AutoTokenizer

# Enocde the labels
def encode_labels(sentence, tokenizer, label_map):
    tokens = []
    encoded_labels = []
    word_indices = []

    for word, label in sentence:
        word_tokens = tokenizer.tokenize(word)
        word_indices.append(len(tokens))  # Track the index of the original word
        tokens.extend(word_tokens)
        # Assign label to each sub-token
        for _ in word_tokens:
            encoded_labels.append(label_map[label])

    return tokens, encoded_labels, word_indices

def prepare_bert_inputs(data):
    tokenizer = AutoTokenizer.from_pretrained('DCU-NLP/bert-base-irish-cased-v1')
    bert_inputs = []
    max_seq_length = 256  # Defining the maximum sequence length

    # Dictionary of labels mapped to integer representations
    label_map = {
        'O': 0,
        'B-PER': 1,
        'I-PER': 2,
        'B-LOC': 3,
        'I-LOC': 4,
        'B-ORG': 5,
        'I-ORG': 6
    }

    for sentence in data:
        tokens, encoded_labels, word_indices = encode_labels(sentence, tokenizer, label_map)

        # Padding the sequences
        tokens = ['[CLS]'] + tokens + ['[SEP]']
        padding_length = max_seq_length - len(tokens)
        tokens += [tokenizer.pad_token] * padding_length

        # 0 corresponds to 'O' tag
        encoded_labels = [0] + encoded_labels + [0]

        # Adjust encoded_labels based on original word indices
        adjusted_labels = [encoded_labels[idx] for idx in word_indices]
        adjusted_labels += [0] * (max_seq_length - len(adjusted_labels))

        # Converting the tokens to input IDs and creating attention mask
        input_ids = tokenizer.convert_tokens_to_ids(tokens)
        attention_mask = [1] * len(input_ids)

        # Padding the attention_mask
        padding_length = max_seq_length - len(attention_mask)
        attention_mask += [0] * padding_length

        # Converting adjusted_labels to tensor
        adjusted_labels = torch.tensor(adjusted_labels, dtype=torch.long)

        # Appending the data to bert_inputs in a dict format
        bert_inputs.append({
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': adjusted_labels
        })

    return bert_inputs

In [None]:
train_inputs = prepare_bert_inputs(train_data)
val_inputs = prepare_bert_inputs(val_data)
test_inputs = prepare_bert_inputs(test_data)

In [None]:
!pip install accelerate -U
!pip install transformers[torch]



In [None]:
from transformers import AutoModelForTokenClassification, AutoTokenizer, Trainer, TrainingArguments
import torch

model_name = 'DCU-NLP/bert-base-irish-cased-v1'
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=7)

# Load the tokeniser
tokenizer = AutoTokenizer.from_pretrained(model_name)
torch.cuda.empty_cache()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
# Checking for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Moving the model to device to use GPU
model.to(device)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at DCU-NLP/bert-base-irish-cased-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30101, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Defining compute_metrics function for the evaluation metrics I want to see
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels.flatten(), preds.flatten())
    report = classification_report(labels.flatten(), preds.flatten(), zero_division=0)
    return {
        'accuracy': acc,
        'classification_report': report
    }

 # Defining the training arguments to be used for the Trainer
from transformers import TrainingArguments

training_args = TrainingArguments("test_trainer",evaluation_strategy="epoch")

from transformers import Trainer

# Passing the model to be trained and the data for training and evaluation along with the metrics defined before

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_inputs,
    eval_dataset=val_inputs,
    compute_metrics=compute_metrics,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertConfig

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

In [None]:
torch.cuda.get_device_name(0)

'Tesla T4'

In [None]:
train_sent = []

for lst in train_data:
  sent = []
  for words in lst:
    sent.append(words[0])
  train_sent.append(sent)

val_sent = []

for lst in val_data:
  sent = []
  for words in lst:
    sent.append(words[0])
  val_sent.append(sent)

test_sent = []

for lst in test_data:
  sent = []
  for words in lst:
    sent.append(words[0])
  test_sent.append(sent)

In [None]:
train_labels = []

for lst in train_data:
  lab = []
  for words in lst:
    lab.append(words[1])
  train_labels.append(lab)

val_labels = []

for lst in val_data:
  lab = []
  for words in lst:
    lab.append(words[1])
  val_labels.append(lab)

test_labels = []

for lst in test_data:
  lab = []
  for words in lst:
    lab.append(words[1])
  test_labels.append(lab)

In [None]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):

        # Tokenise the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Add the tokenised word to the final tokenised word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [None]:
tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(train_sent, train_labels)
]

val_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(val_sent, val_labels)
]

test_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(test_sent, test_labels)
]

In [None]:
tokenized_texts = [token_label_pair[0] for token_label_pair in tokenized_texts_and_labels]
labels = [token_label_pair[1] for token_label_pair in tokenized_texts_and_labels]

val_tokenized_texts = [token_label_pair[0] for token_label_pair in val_tokenized_texts_and_labels]
val_labels = [token_label_pair[1] for token_label_pair in val_tokenized_texts_and_labels]

test_tokenized_texts = [token_label_pair[0] for token_label_pair in test_tokenized_texts_and_labels]
test_labels = [token_label_pair[1] for token_label_pair in test_tokenized_texts_and_labels]

In [None]:
input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

val_input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in val_tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

test_input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in test_tokenized_texts],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

In [None]:
tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

val_tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in val_labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

test_tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in test_labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

In [None]:
attention_masks = [[float(i != 0.0) for i in ii] for ii in input_ids]

val_attention_masks = [[float(i != 0.0) for i in ii] for ii in val_input_ids]

test_attention_masks = [[float(i != 0.0) for i in ii] for ii in test_input_ids]

In [None]:
tr_inputs = torch.tensor(input_ids)
tr_tags = torch.tensor(tags)
tr_masks = torch.tensor(attention_masks)

vl_inputs = torch.tensor(val_input_ids)
vl_tags = torch.tensor(val_tags)
vl_masks = torch.tensor(val_attention_masks)

tst_inputs = torch.tensor(test_input_ids)
tst_tags = torch.tensor(test_tags)
tst_masks = torch.tensor(test_attention_masks)

In [None]:
tr_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
train_sampler = RandomSampler(tr_data)
train_dataloader = DataLoader(tr_data, sampler=train_sampler, batch_size=bs)

valid_data = TensorDataset(vl_inputs, vl_masks, vl_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=bs)

tst_data = TensorDataset(tst_inputs, tst_masks, tst_tags)
tst_sampler = SequentialSampler(tst_data)
tst_dataloader = DataLoader(tst_data, sampler=tst_sampler, batch_size=bs)

In [None]:
from transformers import BertForTokenClassification, AdamW, BertModel, BertConfig

In [None]:
config = BertConfig.from_pretrained('DCU-NLP/bert-base-irish-cased-v1', output_hidden_states=True)
config.max_position_embeddings = 512

bert_model = BertModel.from_pretrained(
                        'DCU-NLP/bert-base-irish-cased-v1',
                        config=config,
                        add_pooling_layer=False
)

In [None]:
import torch.nn as nn

In [None]:
!pip install pytorch-crf
from torchcrf import CRF

Collecting pytorch-crf
  Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)
Installing collected packages: pytorch-crf
Successfully installed pytorch-crf-0.7.2


In [None]:
class BERT_CRF(nn.Module):
    def __init__(self, bert_model, num_labels):
        super(BERT_CRF, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(0.25)
        # 4 last of layer
        self.classifier = nn.Linear(4*768, num_labels)
        self.crf = CRF(num_labels, batch_first = True)

    def forward_custom(self, b_input_ids, b_input_mask,  b_labels=None, token_type_ids=None):
        outputs = self.bert(b_input_ids, attention_mask=b_input_mask)
        sequence_output = torch.cat((outputs[1][-1], outputs[1][-2], outputs[1][-3], outputs[1][-4]),-1)
        sequence_output = self.dropout(sequence_output)

        emission = self.classifier(sequence_output) # [32,256,17]

        if b_labels is not None:
            loss = -self.crf(log_soft(emission, 2), b_labels, mask=b_input_mask.type(torch.uint8), reduction='mean')
            prediction = self.crf.decode(emission, mask=b_input_mask.type(torch.uint8))
            return [loss, prediction]

        else:
            prediction = self.crf.decode(emission, mask=b_input_mask.type(torch.uint8))
            return prediction

In [None]:
model = BERT_CRF(bert_model, num_labels=len(tag2idx))
model.to(device)

BERT_CRF(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30101, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=T

In [None]:
cnt = -1
num_layer = 197
for param in model.named_parameters():
    cnt += 1
    if cnt>=num_layer:
        param[1].requires_grad = True
    else:
        param[1].requires_grad = True
    print(cnt,param[0],'\t',param[1].requires_grad)


FINETUNING = True
if FINETUNING:
    param_optimizer1 = list(model.named_parameters())[:num_layer]
    param_optimizer2 = list(model.named_parameters())[num_layer:num_layer+2]
    param_optimizer3 = list(model.named_parameters())[num_layer+2:]
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer1 if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 1e-5},
        {'params': [p for n, p in param_optimizer1 if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0},

        {'params': [p for n, p in param_optimizer2 if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 1e-3,
         'lr': 1e-3},
        {'params': [p for n, p in param_optimizer2 if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0,
         'lr':1e-3},

        {'params': [p for n, p in param_optimizer3 if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 1e-3,
         'lr':4e-3},
        {'params': [p for n, p in param_optimizer3 if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0,
         'lr':4e-3}
    ]

optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=3e-5,
    eps=1e-8
)

0 bert.embeddings.word_embeddings.weight 	 True
1 bert.embeddings.position_embeddings.weight 	 True
2 bert.embeddings.token_type_embeddings.weight 	 True
3 bert.embeddings.LayerNorm.weight 	 True
4 bert.embeddings.LayerNorm.bias 	 True
5 bert.encoder.layer.0.attention.self.query.weight 	 True
6 bert.encoder.layer.0.attention.self.query.bias 	 True
7 bert.encoder.layer.0.attention.self.key.weight 	 True
8 bert.encoder.layer.0.attention.self.key.bias 	 True
9 bert.encoder.layer.0.attention.self.value.weight 	 True
10 bert.encoder.layer.0.attention.self.value.bias 	 True
11 bert.encoder.layer.0.attention.output.dense.weight 	 True
12 bert.encoder.layer.0.attention.output.dense.bias 	 True
13 bert.encoder.layer.0.attention.output.LayerNorm.weight 	 True
14 bert.encoder.layer.0.attention.output.LayerNorm.bias 	 True
15 bert.encoder.layer.0.intermediate.dense.weight 	 True
16 bert.encoder.layer.0.intermediate.dense.bias 	 True
17 bert.encoder.layer.0.output.dense.weight 	 True
18 bert.encode



In [None]:
from transformers import get_linear_schedule_with_warmup, get_constant_schedule_with_warmup, get_cosine_with_hard_restarts_schedule_with_warmup

In [None]:
epochs = 10
max_grad_norm = 1.0

# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(total_steps/10),
    num_training_steps=total_steps
)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
from tqdm import tqdm, trange

In [None]:
import torch.nn.functional as F
log_soft = F.log_softmax

In [None]:
%%time
## Store the average loss after each epoch so we can plot them.
train_loss_values, valid_loss_values = [], []
PATH = './gabert_crf_ner.pt'
f1_max = 0
f1_list = []
hist = {}

for epoch in trange(epochs, desc="Epoch"):
    # ========================================
    #               Training
    # ========================================
    # Perform one full pass over the training set.

    # Put the model into training mode.
    model.train()
    # Reset the total loss for this epoch.
    total_loss = 0

    # Training loop
    for step, batch in enumerate(train_dataloader):
        # add batch to gpu
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        # Always clear any previously calculated gradients before performing a backward pass.
        model.zero_grad()


        # forward pass
        # This will return the loss (rather than the model output)
        # because we have provided the `labels`.
        outputs = model.forward_custom(b_input_ids, b_input_mask, b_labels, token_type_ids=None)
        loss = outputs[0]
        # Perform a backward pass to calculate the gradients.
        loss.backward()
        # track train loss
        total_loss += loss.item()
        # Clip the norm of the gradient
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        # update parameters
        optimizer.step()
        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(train_dataloader)
    print("\nAverage train loss: {}".format(avg_train_loss))

    # Store the loss value for plotting the learning curve.
    train_loss_values.append(avg_train_loss)


    # ========================================
    #               Validation
    # ========================================
    model.eval()
    predictions_f1 , true_labels_f1 = [], []
    eval_loss = 0
    for batch in valid_dataloader:

        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            outputs = model.forward_custom(b_input_ids, b_input_mask, b_labels, token_type_ids=None)

        eval_loss += outputs[0].mean().item()
        predict_labels = outputs[1]
        label_ids = b_labels.to('cpu').numpy().tolist()
        predictions = []
        for predict_label in predict_labels:
            predictions.append(predict_label)

        for b_input_id, preds, labels in zip(b_input_ids, predictions, label_ids):
            tokens = tokenizer.convert_ids_to_tokens(b_input_id.to('cpu').numpy())

            new_tokens, new_labels, new_preds = [], [], []
            for token, label_idx, pred in zip(tokens, labels, preds):
                if token.startswith("##"):
                    new_tokens[-1] = new_tokens[-1] + token[2:]
                else:
                    new_labels.append(label_idx)
                    new_preds.append(pred)
                    new_tokens.append(token)
            for token, pred, label in zip(new_tokens, new_preds, new_labels):
                predictions_f1.extend([pred])
                true_labels_f1.extend([label])
    eval_loss = eval_loss / len(valid_dataloader)
    print("Validation loss:", eval_loss)
    true_labels_f1_convert = [tag_values[:-1][i] for i in true_labels_f1]
    predictions_f1_convert = [tag_values[:-1][i] for i in predictions_f1]
    f1 = f1_score(true_labels_f1_convert, predictions_f1_convert,labels= tag_values[:-1] ,average='macro')
    print(classification_report(true_labels_f1_convert, predictions_f1_convert, labels= tag_values[:-1], digits=4))


    # eval_loss, f1 = evaluate(BIO_dataloader_valid,device)
    valid_loss_values.append(eval_loss)
    f1_list.append(f1)
    hist['train_loss_values'] = train_loss_values
    hist['valid_loss_values'] = valid_loss_values
    hist['f1_list'] = f1_list
    if f1 > f1_max:

        print(f'f1_score improved from: {f1_max:.4f} to {f1:.4f}')
        print(f'Best model saved to {PATH}')
        f1_max = f1
        patience = 5
        epochs_no_improve = 0
        best_epoch = epoch
    else:
        print(f'f1_score didnt improve from: {f1_max:.4f} to {f1:.4f}')
        epochs_no_improve += 1
        if epochs_no_improve < patience:
            print(f'EarlyStopping count: {epochs_no_improve}/{patience}')
        else:
            print(f'\nEarly Stopping! Total epochs: {epochs}. Best epoch: {best_epoch} with f1_score: {f1_max:.4f}')
            break

  score = torch.where(mask[i].unsqueeze(1), next_score, score)



Average train loss: 30.937777360280354


Epoch:  10%|█         | 1/10 [01:10<10:37, 70.83s/it]

Validation loss: 6.782674789428711
              precision    recall  f1-score   support

           O     0.9775    0.9836    0.9806      2567
       B-PER     0.8571    0.5373    0.6606        67
       I-PER     0.7333    0.7416    0.7374        89
       B-LOC     0.8276    0.7742    0.8000        62
       I-LOC     0.8095    0.8947    0.8500        38
       B-ORG     0.6986    0.6296    0.6623        81
       I-ORG     0.6780    0.7843    0.7273       102

    accuracy                         0.9448      3006
   macro avg     0.7974    0.7636    0.7740      3006
weighted avg     0.9447    0.9448    0.9437      3006

f1_score improved from: 0.0000 to 0.7740
Best model saved to ./gabert_crf_ner.pt

Average train loss: 6.0567414577190695


Epoch:  20%|██        | 2/10 [02:12<08:42, 65.31s/it]

Validation loss: 4.840796113014221
              precision    recall  f1-score   support

           O     0.9806    0.9844    0.9825      2567
       B-PER     0.8333    0.8209    0.8271        67
       I-PER     0.9130    0.7079    0.7975        89
       B-LOC     0.9194    0.9194    0.9194        62
       I-LOC     0.8571    0.9474    0.9000        38
       B-ORG     0.6941    0.7284    0.7108        81
       I-ORG     0.7238    0.7451    0.7343       102

    accuracy                         0.9558      3006
   macro avg     0.8459    0.8362    0.8388      3006
weighted avg     0.9561    0.9558    0.9555      3006

f1_score improved from: 0.7740 to 0.8388
Best model saved to ./gabert_crf_ner.pt

Average train loss: 3.2494165561138053


Epoch:  30%|███       | 3/10 [03:13<07:24, 63.51s/it]

Validation loss: 5.544920027256012
              precision    recall  f1-score   support

           O     0.9929    0.9778    0.9853      2567
       B-PER     0.8689    0.7910    0.8281        67
       I-PER     0.7927    0.7303    0.7602        89
       B-LOC     0.8730    0.8871    0.8800        62
       I-LOC     0.8372    0.9474    0.8889        38
       B-ORG     0.6939    0.8395    0.7598        81
       I-ORG     0.6641    0.8529    0.7468       102

    accuracy                         0.9561      3006
   macro avg     0.8175    0.8609    0.8356      3006
weighted avg     0.9605    0.9561    0.9576      3006

f1_score didnt improve from: 0.8388 to 0.8356
EarlyStopping count: 1/5

Average train loss: 1.8347103733282824


Epoch:  40%|████      | 4/10 [04:15<06:16, 62.72s/it]

Validation loss: 5.048903048038483
              precision    recall  f1-score   support

           O     0.9890    0.9829    0.9859      2567
       B-PER     0.8814    0.7761    0.8254        67
       I-PER     0.9028    0.7303    0.8075        89
       B-LOC     0.8730    0.8871    0.8800        62
       I-LOC     0.7826    0.9474    0.8571        38
       B-ORG     0.7176    0.7531    0.7349        81
       I-ORG     0.7154    0.9118    0.8017       102

    accuracy                         0.9597      3006
   macro avg     0.8374    0.8555    0.8418      3006
weighted avg     0.9625    0.9597    0.9602      3006

f1_score improved from: 0.8388 to 0.8418
Best model saved to ./gabert_crf_ner.pt

Average train loss: 1.0828904547752478


Epoch:  50%|█████     | 5/10 [05:16<05:11, 62.22s/it]

Validation loss: 6.017362117767334
              precision    recall  f1-score   support

           O     0.9944    0.9766    0.9855      2567
       B-PER     0.8667    0.7761    0.8189        67
       I-PER     0.8933    0.7528    0.8171        89
       B-LOC     0.8833    0.8548    0.8689        62
       I-LOC     0.8140    0.9211    0.8642        38
       B-ORG     0.6422    0.8642    0.7368        81
       I-ORG     0.6957    0.9412    0.8000       102

    accuracy                         0.9581      3006
   macro avg     0.8271    0.8695    0.8416      3006
weighted avg     0.9644    0.9581    0.9598      3006

f1_score didnt improve from: 0.8418 to 0.8416
EarlyStopping count: 1/5

Average train loss: 0.7086210338733135


Epoch:  60%|██████    | 6/10 [06:17<04:07, 61.97s/it]

Validation loss: 5.774811685085297
              precision    recall  f1-score   support

           O     0.9906    0.9809    0.9857      2567
       B-PER     0.8235    0.8358    0.8296        67
       I-PER     0.9041    0.7416    0.8148        89
       B-LOC     0.8358    0.9032    0.8682        62
       I-LOC     0.7347    0.9474    0.8276        38
       B-ORG     0.7500    0.7778    0.7636        81
       I-ORG     0.7154    0.8627    0.7822       102

    accuracy                         0.9591      3006
   macro avg     0.8220    0.8642    0.8388      3006
weighted avg     0.9620    0.9591    0.9599      3006

f1_score didnt improve from: 0.8418 to 0.8388
EarlyStopping count: 2/5

Average train loss: 0.5189431535127835


Epoch:  70%|███████   | 7/10 [07:19<03:05, 61.88s/it]

Validation loss: 5.958145499229431
              precision    recall  f1-score   support

           O     0.9882    0.9829    0.9855      2567
       B-PER     0.8833    0.7910    0.8346        67
       I-PER     0.9155    0.7303    0.8125        89
       B-LOC     0.8730    0.8871    0.8800        62
       I-LOC     0.8182    0.9474    0.8780        38
       B-ORG     0.7262    0.7531    0.7394        81
       I-ORG     0.7023    0.9020    0.7897       102

    accuracy                         0.9597      3006
   macro avg     0.8438    0.8563    0.8457      3006
weighted avg     0.9625    0.9597    0.9602      3006

f1_score improved from: 0.8418 to 0.8457
Best model saved to ./gabert_crf_ner.pt

Average train loss: 0.39984605728815764


Epoch:  80%|████████  | 8/10 [08:21<02:04, 62.02s/it]

Validation loss: 6.55455219745636
              precision    recall  f1-score   support

           O     0.9906    0.9805    0.9855      2567
       B-PER     0.8571    0.8060    0.8308        67
       I-PER     0.9028    0.7303    0.8075        89
       B-LOC     0.8594    0.8871    0.8730        62
       I-LOC     0.6792    0.9474    0.7912        38
       B-ORG     0.7191    0.7901    0.7529        81
       I-ORG     0.6855    0.8333    0.7522       102

    accuracy                         0.9568      3006
   macro avg     0.8134    0.8535    0.8276      3006
weighted avg     0.9607    0.9568    0.9578      3006

f1_score didnt improve from: 0.8457 to 0.8276
EarlyStopping count: 1/5

Average train loss: 0.28448369468633944


Epoch:  90%|█████████ | 9/10 [09:23<01:02, 62.01s/it]

Validation loss: 6.37119197845459
              precision    recall  f1-score   support

           O     0.9921    0.9797    0.9859      2567
       B-PER     0.8438    0.8060    0.8244        67
       I-PER     0.9067    0.7640    0.8293        89
       B-LOC     0.8710    0.8710    0.8710        62
       I-LOC     0.8000    0.9474    0.8675        38
       B-ORG     0.7283    0.8272    0.7746        81
       I-ORG     0.6992    0.9118    0.7915       102

    accuracy                         0.9604      3006
   macro avg     0.8344    0.8724    0.8492      3006
weighted avg     0.9643    0.9604    0.9615      3006

f1_score improved from: 0.8457 to 0.8492
Best model saved to ./gabert_crf_ner.pt

Average train loss: 0.24455416756562698


Epoch: 100%|██████████| 10/10 [10:25<00:00, 62.59s/it]

Validation loss: 6.2138543128967285
              precision    recall  f1-score   support

           O     0.9909    0.9809    0.9859      2567
       B-PER     0.8438    0.8060    0.8244        67
       I-PER     0.9067    0.7640    0.8293        89
       B-LOC     0.8750    0.9032    0.8889        62
       I-LOC     0.8000    0.9474    0.8675        38
       B-ORG     0.7273    0.7901    0.7574        81
       I-ORG     0.7209    0.9118    0.8052       102

    accuracy                         0.9611      3006
   macro avg     0.8378    0.8719    0.8512      3006
weighted avg     0.9641    0.9611    0.9619      3006

f1_score improved from: 0.8492 to 0.8512
Best model saved to ./gabert_crf_ner.pt
CPU times: user 10min 7s, sys: 3.14 s, total: 10min 10s
Wall time: 10min 25s





In [None]:
PATH = './gabert_crf_ner.pt'
torch.save(model.state_dict(), PATH)

In [None]:
model.load_state_dict(torch.load('./gabert_crf_ner.pt'))

<All keys matched successfully>

In [None]:
model.eval()

predicted_labels = []
true_labels = []

# Iterate through the test_dataloader to get the predictions
# Using the GPU
for batch in tst_dataloader:
    batch = tuple(t.to(device) for t in batch)
    inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

    # Disabling gradient calculation for evaluation
    with torch.no_grad():
        #outputs = model(**inputs)
        outputs = model.forward_custom(batch[0], batch[1], batch[2], token_type_ids=None)

    # Extracting the predictions and also the true labels for the test data
    predict_labels = outputs[1]
    #print(predict_labels)
    predictions = []
    for predict_label in predict_labels:
      print(predict_label)
      predicted_labels.append(predict_label)
    #predicted_labels.append(predictions)

    true_labels.extend(inputs['labels'].tolist())

[0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 2, 2, 2, 2, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 5, 5, 5, 5, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 5, 4, 4, 4, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0]
[0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 3, 4, 4, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 5, 5, 5, 6, 6, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 

In [None]:
print(predicted_labels)
print(true_labels)

[[0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 2, 2, 2, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 5, 5, 5, 5, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 5, 4, 4, 4, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0], [0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 3, 4, 4, 0, 3, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 5, 5, 5, 6, 6, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
trues = []

for sentences in true_labels:
  t = []
  for vals in sentences:
    if vals != 7:
      t.append(vals)
  trues.append(t)

In [None]:
preds=predicted_labels

In [None]:
print(len(preds))

140


In [None]:
print(test_tokenized_texts)
print(test_labels)
print(preds)

[['Mar', 'a', 't', '##ch', '##ítear', 'do', 'Sheos', '##amh', 'Mac', 'Grianna', 'é', 'caithfidh', 'an', 't', '-', 'ealaíontóir', 'an', 'solas', 'a', 'thabhairt', 'don', 'saol', 'agus', 'diúltú', 'do', 'chath', '##ú', 'sin', 'na', 'truaill', '##íochta', 'a', 'chuireann', 'an', 'saol', 'ina', 'chos', '##án', '.'], ['Dhiúltaigh', 'John', '##atha', '##n', 'Cape', 'agus', 'cúpla', 'foilsitheoir', 'eile', 'an', 'leabhar', 'a', 'fhoilsiú', 'agus', 'bhí', 'Nao', '##mi', 'le', 'ceangal', 'go', 'raibh', 'srian', 'a', 'chur', 'ar', 'a', 'cuid', 'tuairimíocht', '##a', '.'], ['Grianghraif', 'le', 'Maidhc', 'Ó', 'Seach', '##nas', '##aí', '.'], ['Tagann', 'a', 'ráiteas', 'tar', 'éis', 'don', 'chomhlacht', 'a', 'rá', 'le', 'hoibrithe', 'an', 'iarnróid', 'mí', 'ó', 'shin', 'go', 'raibh', 'sí', 'le', 'dúnadh', ':', "'", 'Chu', '##amar', 'ag', 'cruinniú', 'i', 'mBaile', 'Átha', 'an', 'Rí', 'agus', 'dúirt', 'an', 'bhainistíocht', 'linn', 'ansin', 'go', 'raibh', 'deireadh', 'leis', 'an', 'tseirbhís', 'last

In [None]:
i = 0
while i < len(preds):
  if len(test_labels[i]) != len(preds[i]):
    print(f'issue: {i} {len(test_labels[i])}, {len(preds[i])}')
    print(f'{test_tokenized_texts[i]}\n{test_labels[i]}\n{preds[i]}')
    i += 1
  i += 1

In [None]:
import pandas as pd

label_map = {
    0: 'O',
    1: 'B-PER',
    2: 'I-PER',
    3: 'B-LOC',
    4: 'I-LOC',
    5: 'B-ORG',
    6: 'I-ORG'
}

# Initialise an empty list to store DataFrames
dfs = []

# Iterate through the tokens, labels, and predictions
for tokens, labels, predictions in zip(test_tokenized_texts, test_labels, preds):
    # Create a DataFrame from the current sublist
    temp_df = pd.DataFrame({'Word': tokens, 'POS': 'X', 'True': labels, 'Predicted': [label_map[pred] for pred in predictions]})
    # Append the DataFrame to the list
    dfs.append(temp_df)
    # Add an empty row as a DataFrame to the list
    dfs.append(pd.DataFrame({'Word': [''], 'POS': [''], 'True': [''], 'Predicted': ['']}))

# Concatenate the DataFrames along the rows axis
df = pd.concat(dfs, ignore_index=True)

print(df)

           Word POS   True Predicted
0           Mar   X      O         O
1             a   X      O         O
2             t   X      O         O
3          ##ch   X      O         O
4       ##ítear   X      O         O
...         ...  ..    ...       ...
4839          a   X      O         O
4840     bhfuil   X      O         O
4841  Gaeltacht   X  B-LOC     B-LOC
4842        ann   X      O         O
4843                                

[4844 rows x 4 columns]


In [None]:
print(df.head(80))

       Word POS   True Predicted
0       Mar   X      O         O
1         a   X      O         O
2         t   X      O         O
3      ##ch   X      O         O
4   ##ítear   X      O         O
..      ...  ..    ...       ...
75    Seach   X  I-PER     I-PER
76    ##nas   X  I-PER     I-PER
77     ##aí   X  I-PER     I-PER
78        .   X      O         O
79                              

[80 rows x 4 columns]


In [None]:
conll_format = ""

for index, row in df.iterrows():
    text = row['Word']
    pos = row['POS']
    tag = row['True']
    mapped_tag = row['Predicted']

    # Append the token in CoNLL format (word, gold_label, predicted_label)
    conll_format += f"{text}\t{pos}\t{tag}\t{mapped_tag}\n"

# Write the CoNLL format string to a text file
with open('/content/gaBERT_V1_5_Backtranslation.conll', 'w') as f:
    f.write(conll_format)