In [1]:
from llama_index.core import Settings
from llama_index.llms.openai_like import OpenAILike
from llama_index.core.llms import ChatMessage, MessageRole

## Set models
MODEL_NAME   = 'qwen3'
LLM_API_BASE = "http://192.168.100.30:16001/v1"
MAX_TOKENS   = 2048
TIME_OUT     = 600

Settings.llm = OpenAILike(
            model=MODEL_NAME,
            api_base=LLM_API_BASE,
            api_key='EMPTY',
            is_chat_model=True,
            temperature=0.6,
            max_tokens=MAX_TOKENS,
            timeout=TIME_OUT,
            additional_kwargs={"extra_body": {"chat_template_kwargs": {"enable_thinking": False}}},
        )

In [2]:
import mlflow
from mlflow.genai import scorer

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("prompt_test")
mlflow.set_tag("mlflow.runName", "prompt_test")

In [None]:
def llm_function(question: str, **kwargs) -> str:
    ## choose a prompt
    ## Set up in mlflow before running
    prompt = mlflow.genai.load_prompt("prompts:/test_prompt_1/3")
    user_prompt = question
    messages = [
        ChatMessage(role="assistant", content=prompt.format()),
        ChatMessage(role="user", content=user_prompt)
    ]
    response = Settings.llm.chat(messages, **kwargs)
    print(f"response: {response.message.blocks[0].text}")
    return response.message.blocks[0].text

In [4]:
@scorer
def exact_match(outputs: str, expectations: dict) -> bool:
    print(f"outputs: {outputs}, expectations: {expectations}")
    return outputs == expectations["expected_response"]

In [None]:
dataset = [
            {
                "inputs": {"question": "My fasting blood glucose is 10 mmol, do I have diabetes risk?"},
                "expectations": {"expected_response": "Yes."},
            },
            {
                "inputs": {"question": "I have a headache, am I going to die soon?"},
                "expectations": {
                    "expected_response": "No."
                },
            },
            {
                "inputs": {"question": "My blood pressure is 150/110, do I have hypertension risk?"},
                "expectations": {
                    "expected_response": "Yes."
                },
            },
            {
                "inputs": {"question": "My heart beats 60 times per minute, is my heartbeat abnormal?"},
                "expectations": {
                    "expected_response": "No."
                },
            },
        ]

In [None]:
results = mlflow.genai.evaluate(
            data=dataset,
            predict_fn=llm_function,
            scorers=[exact_match
            ],
        )
print(results)
