In [1]:
#!/usr/bin/env python3
# test_turn_on_light.py

from llama_cpp import Llama
import json

In [2]:
MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
NUM_THREADS = 4

In [3]:
def load_model(repo: str, filename: str, threads: int) -> Llama:
    """Load and return a Llama GGUF model."""
    return Llama.from_pretrained(
        repo_id=repo,
        filename=filename,
        n_threads=threads,
    )

In [4]:
def build_prompt(command: str) -> str:
    return f"""You are an AI agent. Given an English user command, output exactly one **complete** JSON object (include all closing braces):
{{
  "name": "<function_name>",
  "arguments": {{ … }}
}}

Allowed functions:
- turn_on_light(room: string)
- transfer_money(recipient: string, amount: integer)

User: "{command}"
Output (complete JSON):"""

In [5]:
def safe_load(raw: str):
    # strip whitespace
    s = raw.strip()
    # count braces
    opens = s.count("{")
    closes = s.count("}")
    # append missing closes
    s += "}" * (opens - closes)
    return json.loads(s)

In [6]:
def run_command(llm: Llama, command: str) -> str:
    """Send the prompt to the model and return its raw JSON response."""
    prompt = build_prompt(command)
    response = llm.create_chat_completion(
        messages=[{"role": "user", "content": prompt}],
        max_tokens=32,
        temperature=0.0,
    )
    return (safe_load(response['choices'][0]['message']['content'].strip()))

In [7]:
def main():
    llm = load_model(MODEL_REPO, MODEL_FILE, NUM_THREADS)
    command = "Turn on the bedroom light"
    result = run_command(llm, command)
    print(result)


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
llama_model_loader: loaded meta data with 24 key-value pairs and 195 tensors from /home/hiro/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct-gguf/snapshots/999f761fe19e26cf1a339a5ec5f9f201301cbb83/./Phi-3-mini-4k-instruct-q4.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = phi3
llama_model_loader: - kv   1:                               general.name str              = Phi3
llama_model_loader: - kv   2:                        phi3.context_length u32              = 4096
llama_model_loader: - kv   3:                      phi3.embedding_length u32              = 3072
llama_model_loader: - kv   4:                   phi3.feed_forward_length u32              = 8192
llama_model_loader: - kv   5:                           phi3.block_count u32              = 3

{'name': 'turn_on_light', 'arguments': {'room': 'bedroom'}}
