In [1]:
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())

#MODEL_NAME = 'llama'  # gemma, mini
MODEL_NAME = 'llama'

In [7]:
from llama_index.core.prompts import PromptTemplate

In [2]:
from utils import MCQ
MCQ.model_json_schema()

{'description': '單選題結構，包含題號(qid)、題幹(stem)、以及 A、B、C、D 四個選項',
 'properties': {'qid': {'description': '題號',
   'title': 'Qid',
   'type': 'integer'},
  'stem': {'description': '題幹', 'title': 'Stem', 'type': 'string'},
  'A': {'description': '本題的A選項', 'title': 'A', 'type': 'string'},
  'B': {'description': '本題的B選項', 'title': 'B', 'type': 'string'},
  'C': {'description': '本題的C選項', 'title': 'C', 'type': 'string'},
  'D': {'description': '本題的D選項', 'title': 'D', 'type': 'string'},
  'ans': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'description': '答案',
   'title': 'Ans'}},
 'required': ['qid', 'stem', 'A', 'B', 'C', 'D'],
 'title': 'MCQ',
 'type': 'object'}

In [3]:
from utils import get_mcq_tool_list

mcq_tool_list = get_mcq_tool_list()
mcq_tool = mcq_tool_list[0]
print(f"# name: {mcq_tool.metadata.name}\n# description: {mcq_tool.metadata.description}")

# name: MCQ
# description: 單選題結構，包含題號(qid)、題幹(stem)、以及 A、B、C、D 四個選項


In [4]:
from utils import get_llm

llama = get_llm('llama')
gemma = get_llm('gemma')

use ollama model: llama3.1:latest
use ollama model: gemma3:12b


# data

In [5]:
import os
import json
from utils import json_load

file_path = os.path.join('data/source/structured_output_dataset.json')
data = json_load(file_path)
data = data['examples']
len(data)

80

# 1. Structured LLMs

In [None]:
# reference: https://developers.llamaindex.ai/python/framework/understanding/extraction/structured_llms/
# code: https://github.com/run-llama/llama_index/blob/8469a034226d20b70a667dc7faf013770716709f/llama-index-core/llama_index/core/llms/structured_llm.py#L32
# note:
## - 這個看起來是沒有加任何 prompt，就是 Pydantic 給了 就要他 predict 了
## - 底層還是去 call structured prediction

In [10]:
query = data[0]['reference_context'][0]
data[0]['reference_context'][0]

'1.常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？\n\xa0\nA.上星、日月\nB.合谷、太衝\nC.內關、外關\nD.上關、下關'

In [24]:
sllama = llama.as_structured_llm(MCQ)
query = data[0]['reference_context'][0]
response = sllama.complete(query)
json.loads(response.text)

{'qid': 1,
 'stem': '答案是 C',
 'A': '上星、日月',
 'B': '合谷、太衝',
 'C': '內關、外關',
 'D': '上關、下關',
 'ans': None}

In [26]:
sgemma = gemma.as_structured_llm(MCQ)
response = sgemma.complete(query)
json.loads(response.text)

{'qid': 1,
 'stem': '1.常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？',
 'A': '上星、日月',
 'B': '合谷、太衝',
 'C': '內關、外關',
 'D': '上關、下關',
 'ans': 'C'}

In [14]:
response

CompletionResponse(text='{"qid":1,"stem":"1.常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？","A":"上星、日月","B":"合谷、太衝","C":"內關、外關","D":"上關、下關","ans":"C"}', additional_kwargs={}, raw=MCQ(qid=1, stem='1.常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？', A='上星、日月', B='合谷、太衝', C='內關、外關', D='上關、下關', ans='C'), logprobs=None, delta=None)

# 2. structured_predict en

In [27]:
# reference: https://developers.llamaindex.ai/python/framework/understanding/extraction/structured_prediction/
# code: https://github.com/run-llama/llama_index/blob/8469a034226d20b70a667dc7faf013770716709f/llama-index-core/llama_index/core/llms/llm.py#L307

In [28]:
prompt = PromptTemplate(
    #"Extract an MCQ from the following text. If you cannot find an answer, use the default value None and the date as the invoice ID: {text}"
    "Extract a multiple-choice question (MCQ) from the following text. If the original text does not provide an answer, omit the answer field entirely and do not attempt to guess it: {text}"
)

response = llama.structured_predict(
    MCQ, prompt, text=query
)

json.loads(response.model_dump_json())

{'qid': 1,
 'stem': '',
 'A': '上星、日月',
 'B': '合谷、太衝',
 'C': '內關、外關',
 'D': '上關、下關',
 'ans': None}

In [29]:
response = gemma.structured_predict(
    MCQ, prompt, text=query
)

json.loads(response.model_dump_json())

{'qid': 1,
 'stem': 'In common acupuncture point combinations, what combination of acupoints is referred to as “Si Guan (Four Passes)”?',
 'A': 'Shang Xing, Ri Yue',
 'B': 'He Gu, Tai Chong',
 'C': 'Nei Guan, Wai Guan',
 'D': 'Shang Guan, Xia Guan',
 'ans': None}

# 3. Structured Prediction zh

In [30]:
prompt = PromptTemplate(
    #"Extract an MCQ from the following text. If you cannot find an answer, use the default value None and the date as the invoice ID: {text}"
    #"Extract a multiple-choice question (MCQ) from the following text. If the original text does not provide an answer, omit the answer field entirely and do not attempt to guess it: {text}"
    "從以下文字中擷取一題選擇題 (MCQ)。如果原始文字沒有提供答案，則完全省略答案欄位，且不要嘗試推測答案：{text}"
)

response = llama.structured_predict(
    MCQ, prompt, text=query
)

json.loads(response.model_dump_json())

{'qid': 1,
 'stem': '',
 'A': '上星、日月',
 'B': '合谷、太衝',
 'C': '內關、外關',
 'D': '上關、下關',
 'ans': None}

In [31]:
response = gemma.structured_predict(
    MCQ, prompt, text=query
)

json.loads(response.model_dump_json())

{'qid': 1,
 'stem': '常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？',
 'A': '上星、日月',
 'B': '合谷、太衝',
 'C': '內關、外關',
 'D': '上關、下關',
 'ans': None}

# 4. chat_with_tool

In [35]:
llm.chat_with_tools?

[31mSignature:[39m
llm.chat_with_tools(
    tools: Sequence[ForwardRef([33m'BaseTool'[39m)],
    user_msg: Union[str, llama_index.core.base.llms.types.ChatMessage, NoneType] = [38;5;28;01mNone[39;00m,
    chat_history: Optional[List[llama_index.core.base.llms.types.ChatMessage]] = [38;5;28;01mNone[39;00m,
    verbose: bool = [38;5;28;01mFalse[39;00m,
    allow_parallel_tool_calls: bool = [38;5;28;01mFalse[39;00m,
    tool_required: bool = [38;5;28;01mFalse[39;00m,
    **kwargs: Any,
) -> llama_index.core.base.llms.types.ChatResponse
[31mDocstring:[39m Chat with function calling.
[31mFile:[39m      ~/miniconda3/envs/rag30/lib/python3.12/site-packages/llama_index/core/llms/function_calling.py
[31mType:[39m      method

In [45]:
message = prompt.format_messages(text=query)[0]
resp = llama.chat_with_tools(
    [mcq_tool],
    user_msg=message,
    # chat_history
    allow_parallel_tool_calls=True,
    tool_required=True
)

tool_calls = llm.get_tool_calls_from_response(
    resp, error_on_no_tool_call=False
)
outputs = []
for tool_call in tool_calls:
    outputs.append(tool_call.model_dump()['tool_kwargs'])
print(outputs)

[{'A': '代村机,天月', 'B': '合度,天台雨', 'C': '南通,割通', 'D': '代通,三通', 'qid': 1, 'stem': '台九気终飯騻合前。\n\nA. 代村机,天月\nB. 合度,天台雨\nC. 南通,割通\nD. 代通,三通'}]


In [43]:
tool_calls[0].model_dump()['tool_kwargs']

{'A': '代村机,天月',
 'B': '合度,天台雨',
 'C': '南通,割通',
 'D': '代通,三通',
 'qid': 1,
 'stem': '台九気终飯騻合前。\n\nA. 代村机,天月\nB. 合度,天台雨\nC. 南通,割通\nD. 代通,三通'}

# 5. Direct prompting without json mode

In [8]:
schema = MCQ.model_json_schema()

gemma_prompt = PromptTemplate(
    "這是 MCQ 的 JSON schema:\n"
    f"{schema}\n"
    "從以下文字中擷取一題選擇題 (MCQ)。如果原始文字沒有提供答案，則完全省略答案欄位，且不要嘗試推測答案\n\n以下開始:\n"
    "-----\n"
    "{text}\n"
    "-----\n"
    "結果：\n"
)

print(gemma_prompt.template)

這是 MCQ 的 JSON schema:
{'description': '單選題結構，包含題號(qid)、題幹(stem)、以及 A、B、C、D 四個選項', 'properties': {'qid': {'description': '題號', 'title': 'Qid', 'type': 'integer'}, 'stem': {'description': '題幹', 'title': 'Stem', 'type': 'string'}, 'A': {'description': '本題的A選項', 'title': 'A', 'type': 'string'}, 'B': {'description': '本題的B選項', 'title': 'B', 'type': 'string'}, 'C': {'description': '本題的C選項', 'title': 'C', 'type': 'string'}, 'D': {'description': '本題的D選項', 'title': 'D', 'type': 'string'}, 'ans': {'anyOf': [{'type': 'string'}, {'type': 'null'}], 'default': None, 'description': '答案', 'title': 'Ans'}}, 'required': ['qid', 'stem', 'A', 'B', 'C', 'D'], 'title': 'MCQ', 'type': 'object'}
從以下文字中擷取一題選擇題 (MCQ)。如果原始文字沒有提供答案，則完全省略答案欄位，且不要嘗試推測答案

以下開始:
-----
{text}
-----
結果：



In [14]:
response = gemma.complete(gemma_prompt.format(text=query))
print(response)

```json
{
  "qid": 1,
  "stem": "常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？",
  "A": "上星、日月",
  "B": "合谷、太衝",
  "C": "內關、外關",
  "D": "上關、下關"
}
```


# 6. Direct prompting with json mode

In [57]:
# this show that json_object didn't work in ollama model (but should work in openai model)
response = gemma.complete(gemma_prompt.format(text=query), additional_kwargs={'response_format': {"type": "json_object"}})
print(response)

```json
{
  "qid": 1,
  "stem": "常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？",
  "A": "上星、日月",
  "B": "合谷、太衝",
  "C": "內關、外關",
  "D": "上關、下關"
}
```


In [11]:
json_gemma = get_llm('gemma', json_mode=True)

use ollama model: gemma3:12b


In [19]:
response = json_gemma.complete(gemma_prompt.format(text=query))
print(response)

{
  "qid": 1,
  "stem": "常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？",
  "A": "上星、日月",
  "B": "合谷、太衝",
  "C": "內關、外關",
  "D": "上關、下關"
}


In [20]:
response.raw

{'model': 'gemma3:12b',
 'created_at': '2025-10-02T17:10:49.07529124Z',
 'done': True,
 'done_reason': 'stop',
 'total_duration': 16989334006,
 'load_duration': 6316815814,
 'prompt_eval_count': 376,
 'prompt_eval_duration': 2615835633,
 'eval_count': 90,
 'eval_duration': 7955849202,
 'message': Message(role='assistant', content='{\n  "qid": 1,\n  "stem": "常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？",\n  "A": "上星、日月",\n  "B": "合谷、太衝",\n  "C": "內關、外關",\n  "D": "上關、下關"\n}', thinking=None, images=None, tool_name=None, tool_calls=None),
 'usage': {'prompt_tokens': 376, 'completion_tokens': 90, 'total_tokens': 466}}

In [22]:
response.text

'{\n  "qid": 1,\n  "stem": "常見針灸配穴法中，所指的「四關穴」，為下列何穴位之組合？",\n  "A": "上星、日月",\n  "B": "合谷、太衝",\n  "C": "內關、外關",\n  "D": "上關、下關"\n}'

In [60]:
Ollama?

[31mInit signature:[39m
Ollama(
    model: str,
    base_url: str = [33m'http://localhost:11434'[39m,
    temperature: Optional[float] = [38;5;28;01mNone[39;00m,
    context_window: int = -[32m1[39m,
    request_timeout: Optional[float] = [32m30.0[39m,
    prompt_key: str = [33m'prompt'[39m,
    json_mode: bool = [38;5;28;01mFalse[39;00m,
    additional_kwargs: Optional[Dict[str, Any]] = [38;5;28;01mNone[39;00m,
    client: Optional[ollama._client.Client] = [38;5;28;01mNone[39;00m,
    async_client: Optional[ollama._client.AsyncClient] = [38;5;28;01mNone[39;00m,
    is_function_calling_model: bool = [38;5;28;01mTrue[39;00m,
    keep_alive: Union[float, str, NoneType] = [38;5;28;01mNone[39;00m,
    thinking: Optional[bool] = [38;5;28;01mNone[39;00m,
    *,
    callback_manager: llama_index.core.callbacks.base.CallbackManager = <factory>,
    system_prompt: Optional[str] = [38;5;28;01mNone[39;00m,
    messages_to_prompt: Annotated[Optional[llama_index.core.llm

In [59]:
OpenAI?

[31mInit signature:[39m
OpenAI(
    model: str = [33m'gpt-3.5-turbo'[39m,
    temperature: float = [32m0.1[39m,
    max_tokens: Optional[int] = [38;5;28;01mNone[39;00m,
    additional_kwargs: Optional[Dict[str, Any]] = [38;5;28;01mNone[39;00m,
    max_retries: int = [32m3[39m,
    timeout: float = [32m60.0[39m,
    reuse_client: bool = [38;5;28;01mTrue[39;00m,
    api_key: Optional[str] = [38;5;28;01mNone[39;00m,
    api_base: Optional[str] = [38;5;28;01mNone[39;00m,
    api_version: Optional[str] = [38;5;28;01mNone[39;00m,
    callback_manager: Optional[llama_index.core.callbacks.base.CallbackManager] = [38;5;28;01mNone[39;00m,
    default_headers: Optional[Dict[str, str]] = [38;5;28;01mNone[39;00m,
    http_client: Optional[httpx.Client] = [38;5;28;01mNone[39;00m,
    async_http_client: Optional[httpx.AsyncClient] = [38;5;28;01mNone[39;00m,
    openai_client: Optional[openai.OpenAI] = [38;5;28;01mNone[39;00m,
    async_openai_client: Optional[openai.As