In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

class HuggingFaceLLMEngine:
    def __init__(self, model_name: str):
        """
        Initializes the model and tokenizer based on the specified model name.
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        # Ensure that the tokenizer has a pad token
        if self.tokenizer.pad_token is None:
            if self.tokenizer.eos_token is not None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
            else:
                # Default to the most common pad token if eos_token is also not set
                self.tokenizer.pad_token = '<pad>'
                
        self.model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=self.tokenizer.pad_token_id)
    
    def generate_text(self, prompt: str, **generation_kwargs):
        """
        Generates text based on a prompt.
        """
        # Encode input and generate attention_mask
        encoding = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=generation_kwargs.get('max_length', 512))
        # Generate text using both input_ids and attention_mask
        output = self.model.generate(**encoding, **generation_kwargs)
        return self.tokenizer.decode(output[0], skip_special_tokens=True)

In [2]:
# Example usage
model_name = "Rocketknight1/falcon-rw-1b"
engine = HuggingFaceLLMEngine(model_name=model_name)

In [3]:
prompt = "rewrite this text: i think eating ice cream is the best thing ever"
generated_text = engine.generate_text(prompt, max_length=50)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [4]:
generated_text

'rewrite this text: i think eating ice cream is the best thing ever.\n- I think eating ice cream is the best thing ever.\n- I think eating ice cream is the best thing ever.\n- I think eating ice cream is'

In [8]:
# Example usage
model_name_falcon = "Rocketknight1/falcon-rw-1b"
engine = HuggingFaceLLMEngine(model_name=model_name_falcon)

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [9]:
prompt = "rewrite this text: i think eating ice cream is the best thing ever"
generated_text = engine.generate_text(prompt, max_length=50)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [10]:
generated_text

'rewrite this text: i think eating ice cream is the best thing ever.\n- I think eating ice cream is the best thing ever.\n- I think eating ice cream is the best thing ever.\n- I think eating ice cream is'