In [1]:
import torch
import os
from transformers import RobertaTokenizerFast
import peft
from colorama import Fore, Back, Style, init
import argparse
import warnings

# Ignore any warnings to reduce console clutter.
warnings.filterwarnings("ignore")

In [2]:
# Define the maximum length for the tokenized sentence.
MAX_LENGTH = 32

In [3]:
def inference_ner(path_to_model, sentence):

    # Initialize the tokenizer for RoBERTa using the pre-trained 'roberta-base' version.
    tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', add_prefix_space=True)
    
    # Tokenize the user's input sentence.
    # - Truncate the sentence if it exceeds the maximum length.
    # - Pad the sentence to the maximum length.
    # - Return tensors for use with PyTorch ('pt').
    # - Include attention masks to differentiate padding from actual data.
    tokenized_sentence = tokenizer(sentence, 
                                   truncation=True,
                                   padding="max_length",
                                   max_length=MAX_LENGTH,
                                   return_tensors="pt",
                                   return_attention_mask=True)
    
    # Load the fine-tuned model from the specified path.
    model = torch.load(path_to_model)
    # Set the model to evaluation mode, disabling layers like dropout.
    model.eval()
    
    # Perform inference without calculating gradients (for efficiency).
    with torch.inference_mode():
        # Pass the tokenized input to the model, including attention masks.
        outputs = model(input_ids=tokenized_sentence["input_ids"],
                        attention_mask=tokenized_sentence["attention_mask"])

    # Apply a softmax to get probabilities and find the most likely class (argmax) for each token.
    outputs = torch.softmax(outputs.logits, dim=2).argmax(dim=2)

    # Convert outputs to a list of labels and tokenized sentence to numpy arrays for processing.
    outputs = list(outputs.squeeze().cpu().numpy())
    tokenized_sentence = list(tokenized_sentence["input_ids"].squeeze().cpu().numpy())
    
    # Print a message indicating that the results are about to be displayed.
    print(Fore.GREEN + "\nHERE IS THE RESULT:\n")
    
    # Iterate through each token and corresponding label.
    for i in range(len(outputs)):
        label = outputs[i]  # The predicted label for the current token.
        token = tokenized_sentence[i]  # The token ID from the tokenized input.
        
        # Skip special tokens like [CLS], [SEP], or padding.
        if token in [0, 1, 2]:
            continue
            
        # Decode the token ID back to a readable word.
        decoded_word = tokenizer.decode(token)
        
        # If the label indicates a mountain entity, print the word in a different color.
        if label:
            print(Fore.LIGHTYELLOW_EX + decoded_word, end=" ")
        else:
            # Otherwise, print the word in the default style.
            print(Style.RESET_ALL + decoded_word, end=" ")
    
    # Print a newline for better formatting after the output.
    print()

In [4]:
sentence_examples = ["So how it was on Kilimanjaro?",
                     "White Glacier is a broad westward flowing tributary glacier which joins the Land Glacier on the north side of Mount McCoy in Marie Byrd Land.",
                     "Other notable sections of the cemetery are the cemetery of the Finnish Guard, the Artist's Hill and the Statesmen's Grove.",
                     "Why don't we hang out together? Let's go on a trip. What about Alpas?"]

In [5]:
for se in sentence_examples:
    inference_ner("data/models/roberta_fine_tuned.pt", se)

[32m
HERE IS THE RESULT:



2024-10-24 20:00:29.589525: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-24 20:00:29.667411: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-24 20:00:30.071785: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment va

[0m So [0m how [0m it [0m was [0m on [93m Kil [93miman [93mjar [93mo [0m? 
[32m
HERE IS THE RESULT:

[0m White [0m Glacier [0m is [0m a [0m broad [0m west [0mward [0m flowing [0m t [0mribut [0mary [0m glacier [0m which [0m joins [0m the [0m Land [0m Glacier [0m on [0m the [0m north [0m side [0m of [93m Mount [93m McCoy [0m in [0m Marie [0m Byrd [0m Land [0m. 
[32m
HERE IS THE RESULT:

[0m Other [0m notable [0m sections [0m of [0m the [0m cemetery [0m are [0m the [0m cemetery [0m of [0m the [0m Finnish [0m Guard [0m, [0m the [93m Artist [93m's [93m Hill [0m and [0m the [0m States [0mmen [0m's [0m Grove [0m. 
[32m
HERE IS THE RESULT:

[0m Why [0m don [0m't [0m we [0m hang [0m out [0m together [0m? [0m Let [0m's [0m go [0m on [0m a [0m trip [0m. [0m What [0m about [93m Al [93mpas [0m? 
