In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import httpx
from brokit.primitives.lm import LM, ModelType, ModelResponse, Usage, Message
from typing import List, Optional

class Llama(LM):
    def __init__(self, model_name: str, base_url:str = "http://localhost:11434", temperature:float=0.0, top_p:float=1.0, seed:int=55, **kwargs):
        super().__init__(model_name=model_name, model_type=ModelType.CHAT)
        self.base_url = base_url
        self.client = httpx.Client(timeout=60.0)  # Reusable client)
        self.model_params = {
            "temperature": temperature,
            "top_p": top_p,
            "seed": seed,
            **kwargs
        }

    def request(self, prompt:Optional[str]=None, messages:Optional[List[Message]]=None, **kwargs) -> dict:
        url = f"{self.base_url}/api/chat"
        params = {**self.model_params, **kwargs}
        if messages is not None:
            _messages = [msg.to_dict() if isinstance(msg, Message) else msg for msg in messages]
        else:
            _messages = [{"role": "user", "content": prompt}]
        response = self.client.post(
            url,
            json={
                "model": self.model_name,
                "messages": _messages,
                "stream": False,
                "options": {**params},
            }
        )                
        return response.json()

    def parse_response(self, original_response: dict) -> ModelResponse:
        message = original_response["message"]
        input_tokens = original_response.get("prompt_eval_count", 0)
        output_tokens = original_response.get("eval_count", 0)
        return ModelResponse(
            model_name=self.model_name,
            model_type=self.model_type,
            response=message["content"],
            usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
            metadata=None
        )

In [3]:
from brokit.primitives.prompt import Prompt, InputField, OutputField
from brokit.primitives.shot import Shot

In [4]:
class QA(Prompt):
    """Think and answer the question"""
    question:str = InputField(description="The question")
    reason:str = OutputField(description="Think and share the reason why you answer like this")
    answer:str = OutputField(description="Your answer based on question and reason")

shots = [
    Shot(QA, question="1+1", answer="2"),
    Shot(QA, question="Where's the capital of Thailand", answer="Bangkok")
]

In [5]:
idx = 0
shots[idx].inputs, shots[idx].outputs

({'question': '1+1'}, {'reason': 'Intentionally left blank.', 'answer': '2'})

In [6]:
from brokit.primitives.predictor import Predictor

# lm = Llama(model_name="gemma3:12b")
lm = Llama(model_name="gemma3:12b")
predictor = Predictor(prompt=QA, lm=lm, shots=shots)

In [7]:
response = predictor(question="1+1")
response

Prediction(
    reason='This is a simple addition problem. The number 1 added to the number 1 equals 2.',
    answer='2'
)

In [8]:
for h in lm.history[-1].request:
    print(h.role.upper())
    print(h.content)
    print("="*20)

SYSTEM
Your input fields are:
1. question (<class 'str'>): The question
Your output fields are:
1. reason (<class 'str'>): Think and share the reason why you answer like this
2. answer (<class 'str'>): Your answer based on question and reason

All interactions will be structured in the following way, with the appropriate values filled in.

<||question||>
{question}

<||reason||>
{reason}

<||answer||>
{answer}

<||completed||>
In adhering to this structure, your objective is: 
Think and answer the question
USER
<||question||>
1+1
ASSISTANT
<||reason||>
Intentionally left blank.

<||answer||>
2

<||completed||>
USER
<||question||>
Where's the capital of Thailand
ASSISTANT
<||reason||>
Intentionally left blank.

<||answer||>
Bangkok

<||completed||>
USER
<||question||>
1+1

Respond with the corresponding output fields, starting with the field: `<||reason||>`, `<||answer||>` and then ending with the marker for `<||completed||>`.


In [9]:
lm.history[0].usage

Usage(input_tokens=284, output_tokens=42)

In [10]:
input_tokens = 0
output_tokens = 0

for h in lm.history:
    input_tokens += h.usage.input_tokens
    output_tokens += h.usage.output_tokens

In [11]:
input_tokens, output_tokens

(284, 42)

In [12]:
len(lm.history)

1