In [None]:
import os, sys, subprocess, json, textwrap, re

subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                       "outlines", "transformers", "accelerate", "sentencepiece", "pydantic"])

import torch
import outlines
from transformers import AutoTokenizer, AutoModelForCausalLM

from typing import Literal, List, Union, Annotated
from pydantic import BaseModel, Field
from enum import Enum

print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("Outlines:", getattr(outlines, "__version__", "unknown"))
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
hf_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
)

if device == "cpu":
    hf_model = hf_model.to(device)

model = outlines.from_transformers(hf_model, tokenizer)

def build_chat(user_text: str, system_text: str = "You are a precise assistant. Follow instructions exactly.") -> str:
    try:
        msgs = [{"role": "system", "content": system_text}, {"role": "user", "content": user_text}]
        return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
    except Exception:
        return f"{system_text}\n\nUser: {user_text}\nAssistant:"

def banner(title: str):
    print("\n" + "=" * 90)
    print(title)
    print("=" * 90)

In [None]:
def extract_json_object(s: str) -> str:
    s = s.strip()
    start = s.find("{")
    if start == -1:
        return s
    depth = 0
    in_str = False
    esc = False
    for i in range(start, len(s)):
        ch = s[i]
        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
        else:
            if ch == '"':
                in_str = True
            elif ch == "{":
                depth += 1
            elif ch == "}":
                depth -= 1
                if depth == 0:
                    return s[start:i + 1]
    return s[start:]

def json_repair_minimal(bad: str) -> str:
    bad = bad.strip()
    last = bad.rfind("}")
    if last != -1:
        return bad[:last + 1]
    return bad

def safe_validate(model_cls, raw_text: str):
    raw = extract_json_object(raw_text)
    try:
        return model_cls.model_validate_json(raw)
    except Exception:
        raw2 = json_repair_minimal(raw)
        return model_cls.model_validate_json(raw2)

banner("2) Typed outputs (Literal / int / bool)")

sentiment = model(
    build_chat("Analyze the sentiment: 'This product completely changed my life!'. Return one label only."),
    Literal["Positive", "Negative", "Neutral"],
    max_new_tokens=8,
)
print("Sentiment:", sentiment)

bp = model(build_chat("What's the boiling point of water in Celsius? Return integer only."), int, max_new_tokens=8)
print("Boiling point (int):", bp)

prime = model(build_chat("Is 29 a prime number? Return true or false only."), bool, max_new_tokens=6)
print("Is prime (bool):", prime)

In [None]:
banner("3) Prompt templating (outlines.Template)")

tmpl = outlines.Template.from_string(textwrap.dedent("""
<|system|>
You are a strict classifier. Return ONLY one label.
<|user|>
Classify sentiment of this text:
{{ text }}
Labels: Positive, Negative, Neutral
<|assistant|>
""").strip())

templated = model(tmpl(text="The food was cold but the staff were kind."), Literal["Positive","Negative","Neutral"], max_new_tokens=8)
print("Template sentiment:", templated)

In [None]:
banner("4) Pydantic structured output (advanced constraints)")

class TicketPriority(str, Enum):
    low = "low"
    medium = "medium"
    high = "high"
    urgent = "urgent"

IPv4 = Annotated[str, Field(pattern=r"^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$")]
ISODate = Annotated[str, Field(pattern=r"^\d{4}-\d{2}-\d{2}$")]

class ServiceTicket(BaseModel):
    priority: TicketPriority
    category: Literal["billing", "login", "bug", "feature_request", "other"]
    requires_manager: bool
    summary: str = Field(min_length=10, max_length=220)
    action_items: List[str] = Field(min_length=1, max_length=6)

class NetworkIncident(BaseModel):
    affected_service: Literal["dns", "vpn", "api", "website", "database"]
    severity: Literal["sev1", "sev2", "sev3"]
    public_ip: IPv4
    start_date: ISODate
    mitigation: List[str] = Field(min_length=2, max_length=6)

email = """
Subject: URGENT - Cannot access my account after payment
I paid for the premium plan 3 hours ago and still can't access any features.
I have a client presentation in an hour and need the analytics dashboard.
Please fix this immediately or refund my payment.
""".strip()

ticket_text = model(
    build_chat(
        "Extract a ServiceTicket from this message.\n"
        "Return JSON ONLY matching the ServiceTicket schema.\n"
        "Action items must be distinct.\n\nMESSAGE:\n" + email
    ),
    ServiceTicket,
    max_new_tokens=240,
)

ticket = safe_validate(ServiceTicket, ticket_text) if isinstance(ticket_text, str) else ticket_text
print("ServiceTicket JSON:\n", ticket.model_dump_json(indent=2))

In [8]:
banner("5) Function-calling style (schema -> args -> call)")

class AddArgs(BaseModel):
    a: int = Field(ge=-1000, le=1000)
    b: int = Field(ge=-1000, le=1000)

def add(a: int, b: int) -> int:
    return a + b

args_text = model(
    build_chat("Return JSON ONLY with two integers a and b. Make a odd and b even."),
    AddArgs,
    max_new_tokens=80,
)

args = safe_validate(AddArgs, args_text) if isinstance(args_text, str) else args_text
print("Args:", args.model_dump())
print("add(a,b) =", add(args.a, args.b))

print("Tip: For best speed and fewer truncations, switch Colab Runtime → GPU.")

Torch: 2.10.0+cpu
CUDA available: False
Outlines: unknown
Using device: cpu


Loading weights:   0%|          | 0/272 [00:00<?, ?it/s]


2) Typed outputs (Literal / int / bool)
Sentiment: Positive
Boiling point (int): 10000000
Is prime (bool): True

3) Prompt templating (outlines.Template)
Template sentiment: Positive

4) Pydantic structured output (advanced constraints)
ServiceTicket JSON:
 {
  "priority": "urgent",
  "category": "login",
  "requires_manager": true,
  "summary": "urgent, urgent",
  "action_items": [
    "return",
    "action",
    "fix",
    "action",
    "action",
    "action"
  ]
}

5) Function-calling style (schema -> args -> call)
Args: {'a': 1, 'b': 2}
add(a,b) = 3
Tip: For best speed and fewer truncations, switch Colab Runtime → GPU.
