## Guided Generation Test
Original code is from here: https://github.com/dottxt-ai/outlines

There were some questionable things in this code, so I worked with Google Gemini
to clean and explain the code a little better.

According to Gemini the code was also out-of-date...

In [None]:
# Install the core libraries.
# - outlines: The structured generation library
# - transformers: The interface for downloading models
# - accelerate: Helper for managing GPU device placement
!pip install -q outlines transformers accelerate torch

In [4]:
import torch
import outlines
from transformers import AutoModelForCausalLM, AutoTokenizer

# TinyLlama is small (1.1B), standard, and widely supported.
# It doesn't require "trust_remote_code=True".
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print(f"Loading {MODEL_NAME}...")

# Load the model weights
llm = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="cuda",          # Load directly to GPU
    torch_dtype=torch.float16,  # Use half-precision (standard for Llama models)
    attn_implementation="eager" # Force standard math (Prevents "Flash Attention" crashes on T4)
)

# Load the tokenizer (converts text to numbers)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Wrap the model with Outlines
# This attaches the "Finite State Machine" engine that enforces your JSON schema.
guided_model = outlines.from_transformers(llm, tokenizer)

print("Model loaded and wrapped successfully.")


In [5]:
from enum import Enum
from pydantic import BaseModel
from typing import List

# 1. Define the Schema
# This acts as the "stencil" for the model. It cannot generate anything outside these rules.
class TicketPriority(str, Enum):
    low = "low"
    medium = "medium"
    high = "high"
    urgent = "urgent"

class ServiceTicket(BaseModel):
    priority: TicketPriority
    category: str
    requires_manager: bool
    summary: str
    action_items: List[str]

# 2. The Input Data
customer_email = """
Subject: URGENT - Cannot access my account after payment

I paid for the premium plan 3 hours ago and still can't access any features.
I've tried logging out and back in multiple times. This is unacceptable as I
have a client presentation in an hour and need the analytics dashboard.
Please fix this immediately or refund my payment.
"""

# 3. The Prompt
# TinyLlama expects this specific format. 
# We explicitly tell it to act as an API that outputs JSON.
prompt = f"""<|system|>
You are a helpful assistant. Extract the support ticket details from the user email.
</s>
<|user|>
{customer_email}
</s>
<|assistant|>
"""

# 4. Execution
print("Analyzing email...")

# The magic happens here:
# The model tries to predict the next tokens, but 'outlines' filters out 
# any token that doesn't fit the ServiceTicket JSON structure.
ticket = guided_model(
    prompt,
    ServiceTicket,
    max_new_tokens=512
)

# 5. Output
print("\n--- TICKET CREATED ---")
print(f"Priority:      {ticket.priority.value.upper()}")
print(f"Category:      {ticket.category}")
print(f"Needs Manager: {ticket.requires_manager}")
print(f"Summary:       {ticket.summary}")
print("Actions:")
for item in ticket.action_items:
    print(f"- {item}")