In [1]:
!pip install transformers torch pytorch-crf

Collecting pytorch-crf
  Downloading pytorch_crf-0.7.2-py3-none-any.whl.metadata (2.4 kB)
Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)
Installing collected packages: pytorch-crf
Successfully installed pytorch-crf-0.7.2


In [2]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
from torchcrf import CRF
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import joblib

In [3]:
# Define the model class with CRF
class NERBertModel(nn.Module):
    def __init__(self, num_tag, class_weights=None):
        super(NERBertModel, self).__init__()
        self.num_tag = num_tag
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert_drop = nn.Dropout(0.3)
        self.out_tag = nn.Linear(768, self.num_tag)
        self.crf = CRF(num_tag, batch_first=True)
        self.class_weights = class_weights

    def forward(self, ids, mask, token_type_ids, target_tags=None):
        output = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)[0]
        bert_out = self.bert_drop(output)
        emissions = self.out_tag(bert_out)
        if self.class_weights is not None:
            class_weights = self.class_weights.to(emissions.device)
            emissions = emissions * class_weights
        if target_tags is not None:
            log_likelihood = self.crf(emissions, target_tags, mask=mask.byte(), reduction='mean')
            return emissions, -log_likelihood
        pred_tags = self.crf.decode(emissions, mask=mask.byte())
        return pred_tags, None

In [5]:
# Load the tokenizer and label encoder
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tag_encoder = joblib.load("ner_label_encoder_v1.pkl")
num_tag = len(tag_encoder.classes_)

# Load the pre-trained model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = NERBertModel(num_tag=num_tag).to(device)
model.load_state_dict(torch.load("ner_bert_model_v1.pth"))
model.eval()

  model.load_state_dict(torch.load("ner_bert_model_v1.pth"))


NERBertModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_

In [6]:
def predict_sentence(sentence, model, tokenizer, tag_encoder, device):
    model.eval()
    inputs = tokenizer(sentence.split(), return_tensors="pt", truncation=True, padding=True, is_split_into_words=True)
    input_ids = inputs['input_ids'].to(device)
    mask = inputs['attention_mask'].to(device)
    token_type_ids = inputs.get('token_type_ids', None)

    if token_type_ids is not None:
        token_type_ids = token_type_ids.to(device)

    with torch.no_grad():
        pred_tags, _ = model(ids=input_ids, mask=mask, token_type_ids=token_type_ids)

    tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu().numpy())
    pred_tags = [tag_encoder.inverse_transform([pred])[0] for pred in pred_tags[0]]

    return tokens, pred_tags

In [7]:
# Example sentences for demonstration
sentences = [
    "Climbing Mount Everest is one of the greatest achievements for any mountaineer.",
    "Kilimanjaro is the highest mountain in Africa and attracts climbers from all over the world.",
    "Mount Fuji is a symbol of Japan and is famous for its beautiful shape.",
    "Aconcagua, located in the Andes, is the tallest mountain in the Americas.",
    "Denali is the highest peak in North America, located in Alaska."
]

# Loop through the sentences and display predictions
for sentence in sentences:
    tokens, pred_tags = predict_sentence(sentence, model, tokenizer, tag_encoder, device)
    print(f"Sentence: {sentence}")
    print(f"Tokens: {tokens}")
    print(f"Predicted Tags: {pred_tags}")
    print("="*50)

Sentence: Climbing Mount Everest is one of the greatest achievements for any mountaineer.
Tokens: ['[CLS]', 'climbing', 'mount', 'everest', 'is', 'one', 'of', 'the', 'greatest', 'achievements', 'for', 'any', 'mountain', '##eer', '.', '[SEP]']
Predicted Tags: ['B-Mountain', 'O', 'B-Mountain', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Mountain']
Sentence: Kilimanjaro is the highest mountain in Africa and attracts climbers from all over the world.
Tokens: ['[CLS]', 'ki', '##lim', '##an', '##jar', '##o', 'is', 'the', 'highest', 'mountain', 'in', 'africa', 'and', 'attracts', 'climb', '##ers', 'from', 'all', 'over', 'the', 'world', '.', '[SEP]']
Predicted Tags: ['B-Mountain', 'B-Mountain', 'B-Mountain', 'B-Mountain', 'B-Mountain', 'B-Mountain', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Mountain']
Sentence: Mount Fuji is a symbol of Japan and is famous for its beautiful shape.
Tokens: ['[CLS]', 'mount', 'fuji', 'is', 'a', 'symbol',

  score = torch.where(mask[i].unsqueeze(1), next_score, score)
