## Output Parser

In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

In [2]:
model = ChatOpenAI(temperature=0)

### StrOutputParser

In [5]:
from langchain_core.output_parsers import StrOutputParser

In [6]:
# 요약 template
human_message_prompt = "'{text}' 여기서 키워드를 뽑아서 콤마로 구분해줘"
human_message_prompt_template = HumanMessagePromptTemplate.from_template(human_message_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt_template])

In [15]:
chain_with_output_parser = chat_prompt_template | model | StrOutputParser()

In [16]:
out = chain_with_output_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [17]:
out

'LangChain, framework, developing applications, language models'

In [18]:
type(out)

str

### CSV Parser

In [23]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate

In [24]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

In [25]:
format_instructions

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [26]:
prompt = PromptTemplate(
    template="{subject}에 대한 키워드를 추출해줘.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions},
)

In [27]:
p = prompt.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [28]:
p

StringPromptValue(text='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')

In [29]:
p.to_messages()

[HumanMessage(content='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')]

In [42]:
chain = prompt | model | output_parser

In [43]:
out = chain.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [44]:
out

['framework', 'developing applications', 'language models']

In [45]:
type(out)

list

In [46]:
out[1]

'developing applications'

### 데이터 형식 정의하기

In [47]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [48]:
model = ChatOpenAI(temperature=0)

In [49]:
# Define your desired data structure.

class Translation(BaseModel):
    translated_text: str = Field(description="번역된 텍스트")

In [50]:
# And a query intented to prompt a language model to populate the data structure.
query = "안녕 세상"

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Translation)
from_instruction = parser.get_format_instructions()
from_instruction

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "\\ubc88\\uc5ed\\ub41c \\ud14d\\uc2a4\\ud2b8", "type": "string"}}, "required": ["translated_text"]}\n```'

In [51]:
prompt = PromptTemplate(
    template="{format_instructions}\n아래 내용을 번역해라\n{query}",
    input_variables=["query"],
    partial_variables={"format_instructions": from_instruction},
)

In [52]:
prompt.invoke({"query": query}).to_messages()

[HumanMessage(content='The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "\\ubc88\\uc5ed\\ub41c \\ud14d\\uc2a4\\ud2b8", "type": "string"}}, "required": ["translated_text"]}\n```\n아래 내용을 번역해라\n안녕 세상')]

In [60]:
chain = prompt | model | parser

out = chain.invoke({"query": query})

In [62]:
out

{'translated_text': 'Hello World'}

In [63]:
type(out)

dict

In [64]:
out['translated_text']

'Hello World'

### Custom output Parser

In [65]:
from langchain_core.output_parsers import BaseOutputParser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""

    def parse(self, text):
        """Parse the output of an LLM call."""
        return text.strip().split(", ")

In [66]:
chain_with_comma_parser = chat_prompt_template | model | CommaSeparatedListOutputParser()

In [67]:
out = chain_with_comma_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [68]:
out

['LangChain', 'framework', 'developing applications', 'language models']

In [69]:
for keyword in out:
    print(keyword)

LangChain
framework
developing applications
language models
