<a href="https://colab.research.google.com/github/b05902062/TinyLlama/blob/main/sft/FunctionCallInteractive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

model_name: str = "ZihminWang/TinyLlama-1.1B-Chat-v1.0-user-intention-v0.1"
#model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print(f"Loading tokenizer for {model_name}...")
# Load tokenizer, ensuring left-padding for decoder-only models for generation
# And setting pad_token if not explicitly defined
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
if tokenizer.pad_token is None:
    # Most modern LLMs should have an EOS token that can double as PAD
    tokenizer.pad_token = tokenizer.eos_token
    # If adding a truly *new* token (e.g., tokenizer.add_special_tokens({'pad_token': '[PAD]'})),
    # you would need to resize model embeddings: model.resize_token_embeddings(len(tokenizer))

print(f"Loading model {model_name}...")
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Model moved to {device}")


def generate_text_with_model(
    prompt: str = "Once upon a time,",
    max_new_tokens: int = 512,
    do_sample: bool = True,
    temperature: float = 0.9,
    top_k: int = 50,
    num_beams: int = 1, # Set to >1 for beam search
    num_return_sequences: int = 1,
    repetition_penalty: float = 1.0,
    no_repeat_ngram_size: int = 0
):
    """
    Generates text using a Hugging Face Transformers decoder-only model.

    Args:
        prompt (str): The starting text for generation.
        max_new_tokens (int): The maximum number of new tokens to generate.
        do_sample (bool): Whether to use sampling (True) or greedy/beam search (False).
        temperature (float): Controls the randomness of sampling. Higher = more random. (Used with do_sample=True).
        top_k (int): Filters out low probability tokens. (Used with do_sample=True).
        num_beams (int): Number of beams for beam search. Set to > 1 for beam search.
                         If num_beams > 1, do_sample must be False.
        num_return_sequences (int): How many independent sequences to generate.
        repetition_penalty (float): Penalizes repeated tokens/ngrams. >1.0 discourages repetition.
        no_repeat_ngram_size (int): All ngrams of this size can only occur once.
                                    Set to > 0 to prevent repeating phrases.
    """

    # --- Prepare the input ---
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

    #print(f"\nGenerating text for prompt: '{prompt}'")
    #print(f"Generation parameters: {{'max_new_tokens': {max_new_tokens}, 'do_sample': {do_sample}, 'temperature': {temperature}, 'top_k': {top_k}, 'num_beams': {num_beams}, 'num_return_sequences': {num_return_sequences}, 'repetition_penalty': {repetition_penalty}, 'no_repeat_ngram_size': {no_repeat_ngram_size}}}")

    # --- Define GenerationConfig (recommended for clarity and reusability) ---
    # The GenerationConfig object centralizes generation parameters.
    # It will use model.config defaults if not explicitly overridden.
    generation_config = GenerationConfig(
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        num_beams=num_beams,
        eos_token_id=tokenizer.eos_token_id, # Always specify this
        pad_token_id=tokenizer.pad_token_id, # Always specify this to avoid warnings
        num_return_sequences=num_return_sequences,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        # Add other parameters like top_p, typical_p, diversity_penalty etc.
        # based on your generation strategy
    )

    # --- Generate Text ---
    # The .generate() method handles all the logic:
    # - Auto-regressive decoding
    # - Applying attention masks and padding (if batching)
    # - Implementing different decoding strategies (greedy, beam, sampling)
    # - Stopping criteria (max_new_tokens, eos_token_id)
    generated_ids = model.generate(
        input_ids,
        generation_config=generation_config, # Pass the config object
        # You can also pass individual parameters directly if not using GenerationConfig:
        # max_new_tokens=max_new_tokens,
        # do_sample=do_sample,
        # temperature=temperature,
        # eos_token_id=tokenizer.eos_token_id,
        # pad_token_id=tokenizer.pad_token_id,
        # ...
    )

    # --- Decode and Print Results ---
    decoded_texts = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

    #print("\n--- Generated Texts ---")
    for i, text in enumerate(decoded_texts):
        print(f"Prompt: ")
        print(text)
        print("-" * 30)

In [None]:
prompt_format = (
      "You have access to the following function. "
      "{system} "
      "Respond to the user AS USUAL if invoking the function won't help the task. "
      "If you decide to invoke the function, you MUST put it in the format of "
      '''{{"function": {{"<name_of_function_to_use>": {{"intent": "true"}}}}}} '''
      "You SHOULD NOT include any other text in the response if you intent to invoke the function. "

      "### Instruction:\n{user}\n\n### Response: "
)

example = {
    "system" : '''{
      "name": "generate_image",
      "description": "Generate an image from a text description (prompt).",
      "parameters": {
        "type": "object",
        "properties": {
          "prompt": {
            "type": "string",
            "description": "A detailed text description of the image to generate. Be specific about subjects, styles, colors, and lighting."
          },
          "negative_prompt": {
            "type": "string",
            "description": "A text description of things to exclude from the image. For example, 'blurry, distorted, ugly'."
          },
          "width": {
            "type": "integer",
            "description": "The desired width of the generated image in pixels. Common values are 512, 768, 1024.",
            "minimum": 64,
            "maximum": 2048
          },
          "height": {
            "type": "integer",
            "description": "The desired height of the generated image in pixels. Common values are 512, 768, 1024.",
            "minimum": 64,
            "maximum": 2048
          }
        }
      }
    } ''',
    "user" : "Give me a image with a boy and a girl playing on a slide. width and height are both 720px.",
}

generate_text_with_model(prompt=prompt_format.format(**example))