In [None]:
import httpx
from typing import List, Optional
import brokit as bk
class Llama(bk.LM):
    def __init__(self, model_name: str, base_url:str = "http://localhost:11434", temperature:float=0.0, top_p:float=1.0, seed:int=55, **kwargs):
        super().__init__(model_name=model_name, model_type=bk.ModelType.CHAT)
        self.base_url = base_url
        self.client = httpx.Client(timeout=60.0)  # Reusable client
        self.model_params = {
            "temperature": temperature,
            "top_p": top_p,
            "seed": seed,
            **kwargs
        }

    def request(self, prompt:Optional[str]=None, messages:Optional[List[bk.Message]]=None, **kwargs) -> dict:
        url = f"{self.base_url}/api/chat"
        params = {**self.model_params, **kwargs}
        if messages is not None:
            _messages = [msg.to_dict() if isinstance(msg, bk.Message) else msg for msg in messages]
        else:
            _messages = [{"role": "user", "content": prompt}]
        response = self.client.post(
            url,
            json={
                "model": self.model_name,
                "messages": _messages,
                "stream": False,
                "options": {**params},
            }
        )                
        return response.json()

    def parse_response(self, original_response: dict) -> bk.ModelResponse:
        message = original_response["message"]
        input_tokens = original_response.get("prompt_eval_count", 0)
        output_tokens = original_response.get("eval_count", 0)
        return bk.ModelResponse(
            model_name=self.model_name,
            model_type=self.model_type,
            response=message["content"],
            usage=bk.Usage(input_tokens=input_tokens, output_tokens=output_tokens),
            metadata=None
        )

In [2]:
llama = Llama(model_name="gemma3:12b")

In [3]:
llama.request(prompt="Hello, how are you?")

{'model': 'gemma3:12b',
 'created_at': '2026-02-05T11:26:08.1780048Z',
 'message': {'role': 'assistant',
  'content': "Hello! I'm doing well, thank you for asking! As an AI, I don't experience feelings like humans do, but I'm operating smoothly and ready to help. ðŸ˜Š\n\nHow are *you* doing today?"},
 'done': True,
 'done_reason': 'stop',
 'total_duration': 1240187100,
 'load_duration': 247528000,
 'prompt_eval_count': 15,
 'prompt_eval_duration': 312808600,
 'eval_count': 49,
 'eval_duration': 667595100}

In [4]:
llama(prompt="Hello, how are you?")

ModelResponse(model_name='gemma3:12b', model_type=<ModelType.CHAT: 'chat'>, response="Hello! I'm doing well, thank you for asking! As an AI, I don't experience feelings like humans do, but I'm operating smoothly and ready to help. ðŸ˜Š\n\nHow are *you* doing today?", usage=Usage(input_tokens=15, output_tokens=49), response_ms=913.6086999787949, cached=False, metadata=None, request=None, parsed_response=None)