In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import httpx
from brokit.primitives.lm import LM, ModelType, ModelResponse, Usage

class Llama(LM):
    def __init__(self, model_name: str, base_url:str = "http://localhost:11434"):
        super().__init__(model_name=model_name, model_type=ModelType.CHAT)
        self.base_url = base_url
        self.client = httpx.Client(timeout=60.0)  # Reusable client)

    def request(self, prompt=None, messages=None, **kwargs) -> dict:
        url = f"{self.base_url}/api/chat"
        response = self.client.post(
            url,
            json={
                "model": self.model_name,
                "messages": messages if messages is not None else [{"role": "user", "content": prompt}],
                "stream": False,
                **kwargs
            }
        )                
        return response.json()

    def parse_response(self, original_response: dict) -> ModelResponse:
        message = original_response["message"]
        input_tokens = original_response.get("prompt_eval_count", 0)
        output_tokens = original_response.get("eval_count", 0)
        return ModelResponse(
            model_name=self.model_name,
            model_type=self.model_type,
            response=message["content"],
            usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
            metadata=None
        )

In [3]:
from brokit.primitives.prompt import Prompt, InputField, OutputField

class QA(Prompt):
    """Answer a question based on the provided context."""
    question:str = InputField(description="The question to be answered.")
    answer:str = OutputField(description="The answer to the question.")

In [4]:
from brokit.primitives.predictor import Predictor

lm = Llama(model_name="gemma3:12b")
predictor = Predictor(prompt=QA, lm=lm)

In [5]:
prediction = predictor(question="How ya been?")
prediction

Prediction(
    answer="I've been doing well, thank you for asking! As an AI, I don't experience feelings or time in the same way humans do, but my systems are running smoothly and I'm ready to assist. How about you?"
)

In [6]:
lm.history

[ModelResponse(model_name='gemma3:12b', model_type=<ModelType.CHAT: 'chat'>, response="<||answer||>\nI've been doing well, thank you for asking! As an AI, I don't experience feelings or time in the same way humans do, but my systems are running smoothly and I'm ready to assist. How about you?\n\n<||completed||>", usage=Usage(input_tokens=158, output_tokens=62), response_ms=3195.1812999996037, cached=False, metadata=None, parsed_response={'answer': "I've been doing well, thank you for asking! As an AI, I don't experience feelings or time in the same way humans do, but my systems are running smoothly and I'm ready to assist. How about you?"})]