In [1]:
!pip install transformers datasets seqeval torch

Collecting datasets
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


In [11]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [2]:
# loading the dataset
from datasets import load_dataset

ds = load_dataset("conll2003")

print(ds)

README.md:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

conll2003.py:   0%|          | 0.00/9.57k [00:00<?, ?B/s]

The repository for conll2003 contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/conll2003.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14041 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3250 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3453 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})


In [3]:
# load the tokenizer
from transformers import AutoTokenizer

model_checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [6]:
# preprocess the dataset
from transformers import DataCollatorForTokenClassification

label_list = ds["train"].features["ner_tags"].feature.names

def tokenize_and_align_labels(examples):
  tokenized_inputs = tokenizer(examples["tokens"], truncation = True, is_split_into_words=True)
  labels = []

  for i, label in enumerate(examples["ner_tags"]):
    word_ids = tokenized_inputs.word_ids(batch_index = i)
    previous_word_idx = None
    label_ids = []

    for word_idx in word_ids:
      if word_idx is None:
        label_ids.append(-100) # ignore special tokens
      elif word_idx != previous_word_idx:
        label_ids.append(label[word_idx])  # Assign correct label
      else:
        label_ids.append(-100)   # ignore subwords
      previous_word_idx = word_idx

    labels.append(label_ids)

  tokenized_inputs["labels"] = labels
  return tokenized_inputs

tokenized_dataset = ds.map(tokenize_and_align_labels, batched = True)

# data collector for padding batched
data_collector = DataCollatorForTokenClassification(tokenizer)

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

In [9]:
# load the pre trained model
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_list))

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
# define evaluation metrics
import evaluate
import numpy as np

metric = evaluate.load("seqeval")

def compute_metrics(eval_pred):
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis = -1)

  true_labels = [[label_list[l] for l in label if l != -100] for label in labels]
  pred_labels = [[label_list[p] for (p, l) in zip(pred, label) if l != -100] for pred, label in zip(predictions, labels)]

  return metric.compute(predictions = pred_labels, references = true_labels)

In [18]:
# train the model using trainer method
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="bert-ner",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collector,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [19]:
trainer.train()

Epoch,Training Loss,Validation Loss,Loc,Misc,Org,Per,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,0.0322,0.050984,"{'precision': 0.9359041167274622, 'recall': 0.9776810016330975, 'f1': 0.9563365282215123, 'number': 1837}","{'precision': 0.858250276854928, 'recall': 0.8405639913232104, 'f1': 0.8493150684931507, 'number': 922}","{'precision': 0.8896057347670251, 'recall': 0.9254287844891872, 'f1': 0.9071637426900585, 'number': 1341}","{'precision': 0.9824079164376031, 'recall': 0.9701411509229099, 'f1': 0.9762360010925976, 'number': 1842}",0.927601,0.942275,0.934881,0.987403
2,0.0209,0.049239,"{'precision': 0.9526091586794462, 'recall': 0.9738704409363091, 'f1': 0.9631224764468371, 'number': 1837}","{'precision': 0.8694736842105263, 'recall': 0.89587852494577, 'f1': 0.8824786324786325, 'number': 922}","{'precision': 0.9116766467065869, 'recall': 0.9082774049217002, 'f1': 0.9099738513261113, 'number': 1341}","{'precision': 0.9799891833423472, 'recall': 0.9837133550488599, 'f1': 0.9818477377404496, 'number': 1842}",0.938799,0.950017,0.944375,0.988649
3,0.0112,0.05015,"{'precision': 0.9660010793308149, 'recall': 0.9744148067501361, 'f1': 0.970189701897019, 'number': 1837}","{'precision': 0.8686974789915967, 'recall': 0.8969631236442517, 'f1': 0.8826040554962647, 'number': 922}","{'precision': 0.9144542772861357, 'recall': 0.9246830723340791, 'f1': 0.9195402298850575, 'number': 1341}","{'precision': 0.9789075175770687, 'recall': 0.9826275787187839, 'f1': 0.980764020590626, 'number': 1842}",0.942928,0.953719,0.948293,0.989058


Trainer is attempting to log a value of "{'precision': 0.9359041167274622, 'recall': 0.9776810016330975, 'f1': 0.9563365282215123, 'number': 1837}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.858250276854928, 'recall': 0.8405639913232104, 'f1': 0.8493150684931507, 'number': 922}" of type <class 'dict'> for key "eval/MISC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8896057347670251, 'recall': 0.9254287844891872, 'f1': 0.9071637426900585, 'number': 1341}" of type <class 'dict'> for key "eval/ORG" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9824079164376031, 'recall': 0.9701411509229099,

TrainOutput(global_step=2634, training_loss=0.019653365525134876, metrics={'train_runtime': 475.7772, 'train_samples_per_second': 88.535, 'train_steps_per_second': 5.536, 'total_flos': 1020143109346326.0, 'train_loss': 0.019653365525134876, 'epoch': 3.0})

In [20]:
model.save_pretrained("finetuned_model_ner")
tokenizer.save_pretrained("finetuned_model_ner")

('finetuned_model_ner/tokenizer_config.json',
 'finetuned_model_ner/special_tokens_map.json',
 'finetuned_model_ner/vocab.txt',
 'finetuned_model_ner/added_tokens.json',
 'finetuned_model_ner/tokenizer.json')

In [31]:
from transformers import pipeline

# Create a pipeline for NER
nlp_ner = pipeline("ner", model=model, tokenizer=tokenizer)

# Example input sentence
text = "John lives in New York and works at Google."

# Run inference
ner_results = nlp_ner(text)

# Print results
for entity in ner_results:
    print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}, Start: {entity['start']}, End: {entity['end']}")


Device set to use cuda:0


Entity: john, Label: LABEL_1, Score: 0.9955, Start: 0, End: 4
Entity: lives, Label: LABEL_0, Score: 0.9990, Start: 5, End: 10
Entity: in, Label: LABEL_0, Score: 0.9972, Start: 11, End: 13
Entity: new, Label: LABEL_5, Score: 0.9986, Start: 14, End: 17
Entity: york, Label: LABEL_6, Score: 0.9989, Start: 18, End: 22
Entity: and, Label: LABEL_0, Score: 0.9995, Start: 23, End: 26
Entity: works, Label: LABEL_0, Score: 0.9992, Start: 27, End: 32
Entity: at, Label: LABEL_0, Score: 0.9974, Start: 33, End: 35
Entity: google, Label: LABEL_3, Score: 0.9972, Start: 36, End: 42
Entity: ., Label: LABEL_0, Score: 0.9994, Start: 42, End: 43
