In [1]:
%%capture
%load_ext autoreload
%autoreload 2

In [3]:
import asyncio

In [None]:
from llm_utils import LM
from llm_utils.lm.alm import AsyncLM
from pydantic import BaseModel, Field
from typing import List, Optional
from transformers import AutoTokenizer
from llm_utils.chat_format.display import get_conversation_one_turn

In [None]:
lm = AsyncLM(port=8180)
slm = LM(port=8180)

In [23]:

class Output(BaseModel):
    reasoning: str = Field(
        description='The reasoning behind the answer',
        examples=['To find the square root of 16, we look for a number that, '
                  'when multiplied by itself, equals 16. The number 4 satisfies '
                  'this condition.'],
    )
    result: float = Field(
        description='The result of the calculation',
        examples=[4.0],
    )


In [25]:
ret = slm.parse(response_model=Output,
                instruction="You are a calculator.", prompt="What is the square root of pi?",
                think=False)

In [26]:
ret

Output(reasoning='The square root of pi is a mathematical operation. I will calculate the square root of the value of pi, which is approximately 3.141592653589793.', result=1.7724538509055159)

In [29]:
ret = await lm.parse(response_model=Output,
                instruction="You are a calculator.", prompt="What is the square root of i^2?",
                think=False)

In [30]:
ret

Output(reasoning='To find the square root of i^2, we first need to calculate i^2. The imaginary unit i is defined as the square root of -1, so i^2 = -1. Now, we need to find the square root of -1. The square root of -1 is defined as i, so the square root of i^2 is i.', result=-1.0)

In [None]:
messages = get_conversation_one_turn(system_msg="You are a calculator.",
                                      user_msg="What is the square root of i^2?")

In [None]:
tokenzier = AutoTokenizer.from_pretrained("Qwen/Qwen3-32B")

In [38]:
slm.inspect_word_probs(messages=messages, tokenizer=tokenzier)

[38;2;254;0;0m<|im_start|>system[0m 
[38;2;255;0;0mYou[0m [38;2;4;250;0mare[0m [38;2;225;29;0ma[0m [38;2;254;0;0mcalculator.<|im_end|>[0m 
[38;2;254;0;0m<|im_start|>user[0m 
[38;2;254;0;0mWhat[0m [38;2;19;235;0mis[0m [38;2;214;40;0mthe[0m [38;2;248;6;0msquare[0m [38;2;96;158;0mroot[0m [38;2;0;255;0mof[0m [38;2;242;12;0mi^2?<|im_end|>[0m


([{'word': '<|im_start|>system', 'probability': 0.002434095879395322},
  {'word': '\n', 'probability': 0.9437415323594567},
  {'word': 'You', 'probability': 3.0866061144135373e-06},
  {'word': 'are', 'probability': 0.9783705742288814},
  {'word': 'a', 'probability': 0.1171738146431303},
  {'word': 'calculator.<|im_end|>', 'probability': 0.00044714252214998756},
  {'word': '\n', 'probability': 0.0021175480235526233},
  {'word': '<|im_start|>user', 'probability': 0.00229015435023361},
  {'word': '\n', 'probability': 0.5589197813168233},
  {'word': 'What', 'probability': 8.307139599103397e-05},
  {'word': 'is', 'probability': 0.920382179256267},
  {'word': 'the', 'probability': 0.15756041827161377},
  {'word': 'square', 'probability': 0.027006153778085445},
  {'word': 'root', 'probability': 0.618580449639966},
  {'word': 'of', 'probability': 0.9973074175095388},
  {'word': 'i^2?<|im_end|>', 'probability': 0.049814276275966726},
  {'word': '\n', 'probability': 0.9046295374753197}],
 [{'151

In [41]:
await lm.inspect_word_probs(messages=messages, tokenizer=tokenzier)

[38;2;254;0;0m<|im_start|>system[0m 
[38;2;255;0;0mYou[0m [38;2;4;250;0mare[0m [38;2;225;29;0ma[0m [38;2;254;0;0mcalculator.<|im_end|>[0m 
[38;2;254;0;0m<|im_start|>user[0m 
[38;2;254;0;0mWhat[0m [38;2;19;235;0mis[0m [38;2;214;40;0mthe[0m [38;2;248;6;0msquare[0m [38;2;96;158;0mroot[0m [38;2;0;255;0mof[0m [38;2;242;12;0mi^2?<|im_end|>[0m


([{'word': '<|im_start|>system', 'probability': 0.002434095879395322},
  {'word': '\n', 'probability': 0.9437415323594567},
  {'word': 'You', 'probability': 3.0866061144135373e-06},
  {'word': 'are', 'probability': 0.9783705742288814},
  {'word': 'a', 'probability': 0.1171738146431303},
  {'word': 'calculator.<|im_end|>', 'probability': 0.00044714252214998756},
  {'word': '\n', 'probability': 0.0021175480235526233},
  {'word': '<|im_start|>user', 'probability': 0.00229015435023361},
  {'word': '\n', 'probability': 0.5589197813168233},
  {'word': 'What', 'probability': 8.307139599103397e-05},
  {'word': 'is', 'probability': 0.920382179256267},
  {'word': 'the', 'probability': 0.15756041827161377},
  {'word': 'square', 'probability': 0.027006153778085445},
  {'word': 'root', 'probability': 0.618580449639966},
  {'word': 'of', 'probability': 0.9973074175095388},
  {'word': 'i^2?<|im_end|>', 'probability': 0.049814276275966726},
  {'word': '\n', 'probability': 0.9046295374753197}],
 [{'151

In [42]:
slm.last_messages()

[{'role': 'system', 'content': 'You are a calculator.\n\n/no_think'},
 {'role': 'user', 'content': 'What is the square root of pi?'},
 {'role': 'assistant',
  'content': '<think>\n\n</think>\n{"reasoning": "The square root of pi is a mathematical operation. I will calculate the square root of the value of pi, which is approximately 3.141592653589793.", "result": 1.7724538509055159}'}]

In [43]:
await lm.last_messages()

[{'role': 'system', 'content': 'You are a calculator.\n\n/no_think'},
 {'role': 'user', 'content': 'What is the square root of i^2?'},
 {'role': 'assistant',
  'content': '<think>\n\n</think>\n{\n"reasoning": "To find the square root of i^2, we first need to calculate i^2. The imaginary unit i is defined as the square root of -1, so i^2 = -1. Now, we need to find the square root of -1. The square root of -1 is defined as i, so the square root of i^2 is i.",\n"result": -1\n}'}]

In [None]:
from llm_utils.lm.alm import AsyncLLMTask

class TranslateInput(BaseModel):
    source_text: str
    target_lang: str
    source_lang: str

class TranslateOutput(BaseModel):
    translation: str
    
class TranslateTask(AsyncLLMTask):
    """You are a translator. Your task is to translate the given text from one language to another"""
    lm = AsyncLM(port=8180)
    InputModel = TranslateInput
    OutputModel = TranslateOutput



In [45]:
alm_task = TranslateTask()

In [46]:
output = await alm_task(TranslateInput(source_text="Hello, how are you?",
                                            target_lang="French",
                                            source_lang="English"))

In [47]:
output

TranslateOutput(translation='Bonjour, comment vas-tu ?')

In [2]:
from llm_utils import show_string_diff
from llm_utils import get_conversation_one_turn, show_chat
from llm_utils.lm.chat_html import LMChatHtml as LM

# Test with default font size
lm = LM(port=8140)
print(f"Default font size: {lm.font_size}px")
# Test streaming with improved HTML mode in Jupyter
response = lm.chat_stream(
    prompt='Hello /think',
    html_mode=True,
    max_tokens=10000, 
    # show_input=True
)

[32m2025-05-30 12:41:10.781[0m | [34m[1mDEBUG   [0m | [36mllm_utils.lm.lm[0m:[36mlist_models[0m:[36m482[0m - [34m[1mBase URL: http://localhost:8140/v1/[0m
[32m2025-05-30 12:41:10.902[0m | [34m[1mDEBUG   [0m | [36mllm_utils.lm.lm[0m:[36mmodel[0m:[36m107[0m - [34m[1mModel not set, using first available model: ./model_store/Qwen/Qwen3-32B-FP8[0m


Default font size: 4px


KeyboardInterrupt: 