In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage

from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

from langchain_core.runnables import RunnablePassthrough
from datetime import date

In [2]:
from pydantic import BaseModel, Field
from typing import Optional, Literal


class AutomationStep(BaseModel):
    action: str = Field(..., description="Description of the specific action to perform.")
    input: Optional[str] = Field(None, description="Input required for the action (e.g., file path, keyword).")
    output: Optional[str] = Field(None, description="Expected output from the action.")
    tool: Literal[
        "ocr_reader",
        "excel_writer",
        "comparer",
        "web_automation",
        "email_fetcher",
        "unspecified"
    ] = Field(..., description="The type of tool or method to be used for the action.")


class ResultSchema(BaseModel):
    steps: list[AutomationStep] = Field(description="Step-by-step breakdown of the task to automate.")

In [3]:
parameters = dict(
    base_url="http://localhost:1234/v1",
    api_key="nothing", # required, but unused
    model="qwen3:4B",
    temperature=0.0,
    max_tokens=10240,
)

client = ChatOpenAI(**parameters).with_structured_output(
    schema=ResultSchema,
    include_raw=True,
    strict=True,
)

In [4]:
client

{
  raw: RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x10a4e1130>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x10c080140>, root_client=<openai.OpenAI object at 0x104831070>, root_async_client=<openai.AsyncOpenAI object at 0x10a4e12b0>, model_name='qwen3:4B', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:1234/v1', max_tokens=10240), kwargs={'response_format': <class '__main__.ResultSchema'>, 'ls_structured_output_format': {'kwargs': {'method': 'json_schema', 'strict': True}, 'schema': {'type': 'function', 'function': {'name': 'ResultSchema', 'description': '', 'parameters': {'properties': {'steps': {'description': 'Step-by-step breakdown of the task to automate.', 'items': {'properties': {'action': {'description': 'Description of the specific action to perform.', 'type': 'string'}, 'input': {'anyOf': [{'type': 'string

In [13]:
__SYSTEM_PROMPT = """
You are a helpful AI assistant that converts user instructions into a step-by-step action plan that can be executed by an automation system.

Each step should include:
- action: a clear description of what needs to be done
- input: the required file, path, or data input
- output: the expected result or data output
- tool: the tool category used to perform this step
  (e.g., file_browser, pdf_extractor, ocr_reader, excel_writer, comparer, web_automation, email_fetcher)

If any required information is missing, write "unspecified".

Return the result in the following JSON format:
```json
{{
  "steps": [
    {{
      "action": "...",
      "input": "...",
      "output": "...",
      "tool": "..."
    }}
  ]
}}
Be concise, structured, and do not invent missing data.
"""


prompt_template = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(__SYSTEM_PROMPT),
        SystemMessagePromptTemplate.from_template("/no_think"),
        HumanMessagePromptTemplate.from_template("{message}"),
        HumanMessagePromptTemplate.from_template("current DATETIME with time zone is {current_datetime}"),
    ]
)

In [None]:
from pendulum import now

message = "영수증 이미지를 OCR로 읽어서 항목별로 엑셀에 저장하고, 5만 원 이상 항목은 강조해줘"

formatted = prompt_template.format_prompt(
    message=message,
    current_datetime=now("Asia/Seoul").to_iso8601_string(),
).to_messages()

results = client.invoke(formatted)

In [18]:
# results["raw"]
results["parsed"]

ResultSchema(steps=[AutomationStep(action='OCR 영수증 이미지를 읽기', input='영수증 이미지 파일 경로', output='OCR에서 추출된 텍스트 데이터', tool='ocr_reader'), AutomationStep(action='텍스트 데이터를 엑셀에 저장하기', input='OCR에서 추출된 텍스트 데이터, 엑셀 파일 경로', output='OCR에서 추출된 항목별 정보가 담긴 엑셀 파일', tool='excel_writer'), AutomationStep(action='엑셀 파일에서 5만 원 이상의 항목 찾기', input='OCR에서 추출된 항목별 정보가 담긴 엑셀 파일', output='5만 원 이상의 항목이 강조된 엑셀 파일', tool='comparer')])