In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer
from collections import Counter
import numpy as np

# Load the dataset
ner_dataset = load_dataset("tner/bc5cdr")

# Initialize the tokenizer with add_prefix_space=True
tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)

# Tokenization and alignment of labels
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], padding='max_length', truncation=True, max_length=128, is_split_into_words=True)
    labels = []
    for i, label in enumerate(examples["tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Tokenize the datasets
tokenized_datasets = ner_dataset.map(tokenize_and_align_labels, batched=True)

# Select small subsets for training and evaluation
small_train_dataset = tokenized_datasets["train"].select(range(1000))
small_eval_dataset = tokenized_datasets["validation"].select(range(200))

# Define FocalLoss
import torch
import torch.nn.functional as F

class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=0.25, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

# Initialize focal loss
focal_loss = FocalLoss(alpha=0.25, gamma=2)

# Load the model
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

model = AutoModelForTokenClassification.from_pretrained("roberta-base", num_labels=5)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    gradient_accumulation_steps=1,
    logging_dir='./logs',
    logging_steps=10,
)

# Custom Trainer to use Focal Loss
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        loss = focal_loss(logits.view(-1, model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Initialize the trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Evaluate the model
from sklearn.metrics import classification_report

def flatten_list(nested_list):
    return [item for sublist in nested_list for item in sublist]

small_test_dataset = tokenized_datasets["test"].select(range(200))
predictions, labels, _ = trainer.predict(small_test_dataset)
predictions = np.argmax(predictions, axis=2)

true_labels = [[label for label in label if label != -100] for label in labels]
true_predictions = [
    [p for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]

flat_true_labels = flatten_list(true_labels)
flat_true_predictions = flatten_list(true_predictions)

print(classification_report(flat_true_labels, flat_true_predictions))

Map:   0%|          | 0/5228 [00:00<?, ? examples/s]

Map:   0%|          | 0/5330 [00:00<?, ? examples/s]

Map:   0%|          | 0/5865 [00:00<?, ? examples/s]

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.0026,0.001471
2,0.0012,0.001164
3,0.0008,0.001184


              precision    recall  f1-score   support

           0       0.99      0.98      0.98      3238
           1       0.84      0.91      0.88       150
           2       0.73      0.81      0.77       121
           3       0.57      0.85      0.68        54
           4       0.73      0.80      0.76        10

    accuracy                           0.96      3573
   macro avg       0.77      0.87      0.81      3573
weighted avg       0.97      0.96      0.97      3573

