In [1]:
!pip install transformers datasets seqeval



In [2]:
import torch
from datasets import load_dataset
import evaluate
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer
)
import numpy as np

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [31]:
!rm -rf ~/.cache/huggingface/datasets

In [4]:
from datasets import load_dataset

dataset = load_dataset("conll2003")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

conll2003.py: 0.00B [00:00, ?B/s]

Using the latest cached version of the dataset since conll2003 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'conll2003' at /root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98 (last modified on Thu Jul 24 07:01:16 2025).


In [5]:
label_list = dataset["train"].features["ner_tags"].feature.names

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")


In [6]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True
    )

    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(label[word_idx])
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)


Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

In [7]:
model = AutoModelForTokenClassification.from_pretrained(
    "bert-base-cased",
    num_labels=len(label_list)
).to(device)


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
metric = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

Downloading builder script: 0.00B [00:00, ?B/s]

In [9]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir="./ner-bert",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=10,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0724,0.074036,0.910392,0.918805,0.914579,0.97947
2,0.023,0.067135,0.931183,0.92993,0.930556,0.983119
3,0.0183,0.061371,0.938371,0.94258,0.940471,0.985106


TrainOutput(global_step=2634, training_loss=0.08157306129569662, metrics={'train_runtime': 515.4293, 'train_samples_per_second': 81.724, 'train_steps_per_second': 5.11, 'total_flos': 1050534559887048.0, 'train_loss': 0.08157306129569662, 'epoch': 3.0})

In [10]:
eval_results = trainer.evaluate()
print(eval_results)


{'eval_loss': 0.06137104704976082, 'eval_precision': 0.9383708467309754, 'eval_recall': 0.9425802978647049, 'eval_f1': 0.9404708620535315, 'eval_accuracy': 0.9851062577264967, 'eval_runtime': 9.9099, 'eval_samples_per_second': 327.955, 'eval_steps_per_second': 20.585, 'epoch': 3.0}


In [11]:
label_list = dataset["train"].features["ner_tags"].feature.names

In [12]:
from transformers import pipeline

ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple", device=0)

example = "Cristiano Ronaldo was born in Portugal and plays for Al-Nassr FC."
ner_results = ner_pipe(example)

for entity in ner_results:
    print(f"{entity['word']} -> {entity['entity_group']} ({entity['score']:.2f})")


Device set to use cuda:0


Cristiano -> LABEL_1 (1.00)
Ronaldo -> LABEL_2 (1.00)
was born in -> LABEL_0 (1.00)
Portugal -> LABEL_5 (1.00)
and plays for -> LABEL_0 (1.00)
Al - Nassr -> LABEL_3 (1.00)
FC -> LABEL_4 (1.00)
. -> LABEL_0 (1.00)


In [13]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from torch.nn.functional import softmax

# Load model and tokenizer
model_path = "./ner-bert/checkpoint-878"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)


In [14]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [15]:
# Prepare input
sentence = "Cristiano Ronaldo was born in Portugal and plays for Al-Nassr FC."
tokens = tokenizer(sentence, return_tensors="pt", truncation=True, is_split_into_words=False)
tokens = {k: v.to(device) for k, v in tokens.items()}

In [16]:
# Forward pass
with torch.no_grad():
    outputs = model(**tokens)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)

In [17]:

# Convert predictions to labels
label_list = dataset["train"].features["ner_tags"].feature.names
predicted_labels = [label_list[p.item()] for p in predictions[0]]


In [18]:
# Get token strings
token_strings = tokenizer.convert_ids_to_tokens(tokens["input_ids"][0])

# Display results
for token, label in zip(token_strings, predicted_labels):
    print(f"{token:15} -> {label}")


[CLS]           -> O
C               -> B-PER
##rist          -> B-PER
##iano          -> B-PER
Ronald          -> I-PER
##o             -> I-PER
was             -> O
born            -> O
in              -> O
Portugal        -> B-LOC
and             -> O
plays           -> O
for             -> O
Al              -> B-ORG
-               -> B-ORG
Na              -> B-ORG
##ss            -> B-ORG
##r             -> B-ORG
FC              -> I-ORG
.               -> O
[SEP]           -> O
