In [1]:
from transformers import BertForTokenClassification, BertTokenizer
import torch

# Define the path to your fine-tuned model
model_path = "c:/Users/Mark/OneDrive/Desktop/thesis-webpage/ner/ner_biobert/checkpoint-420"

# Load the fine-tuned model and tokenizer
model = BertForTokenClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Example text for NER
text = "Aspirin and Paracetamol is a commonly used drug for pain relief.Ibuprofen is another common pain reliever."

# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
logits = outputs.logits
predictions = torch.argmax(logits, dim=2)

# Decode predictions
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
labels = [model.config.id2label[p.item()] for p in predictions[0]]

# Reconstruct words and their labels
word_labels = []
current_word = ""
current_label = "O"

for token, label in zip(tokens, labels):
    if token.startswith("##"):  # Subword token
        current_word += token[2:]  # Append subword without "##"
    else:
        if current_word:  # Save the previous word and its label
            word_labels.append((current_word, current_label))
        current_word = token  # Start a new word
        current_label = label

# Add the last word
if current_word:
    word_labels.append((current_word, current_label))

# Merge consecutive B-DRUG and I-DRUG tokens
merged_drug_names = []
current_drug = ""

for word, label in word_labels:
    if label == "B-DRUG":
        if current_drug:  # Save the previous drug name
            merged_drug_names.append(current_drug)
        current_drug = word  # Start a new drug name
    elif label == "I-DRUG":
        current_drug += word  # Append to the current drug name
    else:
        if current_drug:  # Save the previous drug name
            merged_drug_names.append(current_drug)
            current_drug = ""

# Add the last drug name if any
if current_drug:
    merged_drug_names.append(current_drug)

# Print the merged drug names
print("Merged Drug Names:", merged_drug_names)

  from .autonotebook import tqdm as notebook_tqdm


Merged Drug Names: ['aspirin', 'paracetamol', 'ibuprofen']
