In [5]:
import pandas as pd
from collections import defaultdict

# Load the file
df = pd.read_csv("NER-test.tsv", sep="\t", error_bad_lines=False, engine="python")


# Reconstruct sentences and gold tags
sentences = defaultdict(list)
gold_tags = defaultdict(list)

for _, row in df.iterrows():
    sent_id = row['sentence_id']
    sentences[sent_id].append(row['token'])
    gold_tags[sent_id].append(row['BIO_NER_tag'])

tokens_list = list(sentences.values())
sent_list = [" ".join(tokens) for tokens in tokens_list]
gold_list = list(gold_tags.values())


Skipping line 33: Expected 4 fields in line 33, saw 5


In [6]:
import spacy

nlp_spacy = spacy.load("en_core_web_sm")
spacy_preds = []

for tokens in tokens_list:
    sentence = " ".join(tokens)
    doc = nlp_spacy(sentence)
    ents = ["O"] * len(tokens)

    for ent in doc.ents:
        ent_tokens = ent.text.split()
        for i, token in enumerate(tokens):
            if token in ent_tokens:
                prefix = "B-" if i == 0 or ents[i-1] == "O" else "I-"
                ents[i] = prefix + ent.label_

    spacy_preds.append(ents)


In [7]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
hf_ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

hf_preds = []
for tokens in tokens_list:
    sentence = " ".join(tokens)
    prediction = hf_ner(sentence)
    tags = ["O"] * len(tokens)

    for ent in prediction:
        ent_text = ent["word"].split()
        for i, token in enumerate(tokens):
            if token in ent_text:
                prefix = "B-" if i == 0 or tags[i-1] == "O" else "I-"
                tags[i] = prefix + ent["entity_group"]

    hf_preds.append(tags)




tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [8]:
from seqeval.metrics import classification_report

print("=== spaCy NER Performance ===")
print(classification_report(gold_list, spacy_preds))

print("\n=== HuggingFace NER Performance ===")
print(classification_report(gold_list, hf_preds))


=== spaCy NER Performance ===
             precision    recall  f1-score   support

     PERSON       0.40      0.36      0.38        11
WORK_OF_ART       1.00      0.22      0.36         9
        LOC       0.00      0.00      0.00         7
        ORG       0.00      0.00      0.00         3

  micro avg       0.18      0.20      0.19        30
  macro avg       0.45      0.20      0.25        30


=== HuggingFace NER Performance ===
             precision    recall  f1-score   support

     PERSON       0.00      0.00      0.00        11
WORK_OF_ART       0.00      0.00      0.00         9
        LOC       0.83      0.71      0.77         7
        ORG       0.60      1.00      0.75         3

  micro avg       0.25      0.27      0.26        30
  macro avg       0.25      0.27      0.25        30



In [10]:
# Save to CSV
error_df = pd.DataFrame({
    "Sentence": sent_list,
    "Gold": gold_list,
    "spaCy": spacy_preds,
    "HF_BERT": hf_preds
})
error_df.to_csv("ner_comparison_results.csv", index=False)
