## Output Parser

In [1]:
import google.generativeai as genai

#os_key Setting

from dotenv import load_dotenv
import os
# .env 파일 로드(api key load) 
#GOOGLE_API_KEY 
#LANGCHAIN_API_KEY 
#TAVILY_API_KEY 
#HUGGINGFACEHUB_API_TOKEN 
#COHERE_API_KEY 

load_dotenv()

True

In [2]:
# GOOGLE_API_KEY 환경 변수 값 가져오기
google_api_key = os.getenv('GOOGLE_API_KEY')

# genai.configure로 GOOGLE API KEY Setting하기기
genai.configure(api_key=google_api_key)

In [3]:
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

In [None]:
# OPEN AI로 Chat Model 선택

In [2]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(temperature=0)

In [4]:
# Gemini로 Chat Model 선택

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)

### StrOutputParser
### 대화형 LLM을 구성할 때 주로 사용한다.

In [18]:
from langchain_core.output_parsers import StrOutputParser

In [19]:
# 요약 template
human_message_prompt = "'{text}' 여기서 키워드를 뽑아서 콤마로 구분해줘"
human_message_prompt_template = HumanMessagePromptTemplate.from_template(human_message_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt_template])

In [8]:
chain_with_output_parser = chat_prompt_template | model | StrOutputParser()

In [9]:
out = chain_with_output_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [10]:
out

'LangChain, framework, language models, applications\n'

In [17]:
out

'LangChain, framework, developing applications, language models'

In [11]:
type(out)

str

In [12]:
#StrOutputParser()을 붙이지 않았다면?
chain_with_output_parser2 = chat_prompt_template | model #| StrOutputParser()

In [13]:
out2 = chain_with_output_parser2.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [16]:
out2 #Output Parser를 붙이지 않으면 LLM Message 그대로 AIMessage가 된다.

AIMessage(content='LangChain, framework, language models, applications\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-e96e24f1-2391-4c20-8700-aeb5be054a6c-0', usage_metadata={'input_tokens': 34, 'output_tokens': 10, 'total_tokens': 44})

In [17]:
type(out2)

langchain_core.messages.ai.AIMessage

### CSV Parser

In [20]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate

In [21]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

In [23]:
format_instructions #CommaSeparatedListOutputParser를 사용했을 때 LLM이 ,로 구문해서 답변을 요청하는 instruction

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [24]:
type(format_instructions)

str

In [26]:
prompt = PromptTemplate(
    template="{subject}에 대한 키워드를 추출해줘.\n{format_instructions}",
    input_variables=["subject"],                           #template의 input_variables로 invoke할 때 지정하는 변수
    partial_variables={"format_instructions": format_instructions},  
    #partial_variables는 이미 만들어 논 변수를 바로 template에 지정하는 변수수
)

In [27]:
p = prompt.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [28]:
p

StringPromptValue(text='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')

In [29]:
word_content = "키워드"

In [30]:
prompt2 = PromptTemplate(
    template="{subject}에 대한 {word}를를 추출해줘.\n{format_instructions}",
    input_variables=["subject"],            #template의 input_variables로 invoke할 때 지정하는 변수
    partial_variables={"format_instructions": format_instructions,"word": word_content},  
    #partial_variables는 이미 만들어 논 변수를 바로 template에 지정하는 변수수
)

In [31]:
p2 = prompt.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [32]:
p2

StringPromptValue(text='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`')

In [33]:
p.to_messages()

[HumanMessage(content='LangChain is a framework for developing applications powered by language models.에 대한 키워드를 추출해줘.\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})]

In [34]:
chain = prompt | model | output_parser

In [35]:
out = chain.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [36]:
out

['LangChain', 'framework', 'language models', 'applications']

In [37]:
type(out)

list

In [38]:
out[1]

'framework'

In [39]:
chain2 = prompt2 | model | output_parser

In [40]:
out2 = chain2.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [41]:
out2

['LangChain', 'framework', 'language models', 'applications']

In [42]:
#StrOutputParser()을 붙이지 않았다면?

In [44]:
chain3 = prompt2 | model

In [45]:
out3 = chain3.invoke({"subject": "LangChain is a framework for developing applications powered by language models."})

In [46]:
##Output Parser를 붙이지 않으면 LLM Message 그대로 AIMessage가 된다.
out3

AIMessage(content='LangChain, framework, language models, applications\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-3b515493-dd08-49c4-a83e-d598d6dca3a1-0', usage_metadata={'input_tokens': 55, 'output_tokens': 10, 'total_tokens': 65})

### 데이터 형식 정의하기

In [47]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field #프로토콜을 만들 때 사용하고 Data를 주고 받을 때 형식을 지정한다.

In [None]:
#Open AI 사용시
from langchain_openai import ChatOpenAI
model = ChatOpenAI(temperature=0)

In [48]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)

In [49]:
# Define your desired data structure.

class Translation(BaseModel): #class를 만들고 BaseModel을 상속을 받는다.
    #translated_text: str는 translated_text라는 이름의 변수와 그 타입을 문자열 (str)로 선언합니다.

    #Field(description="번역된 텍스트")는 translated_text 필드에 대한 추가 메타데이터를 제공합니다. 
    #여기서는 "번역된 텍스트"라는 설명을 추가합니다.
    translated_text: str = Field(description="번역된 텍스트")  

In [50]:
# And a query intented to prompt a language model to populate the data structure.
query = "안녕 세상"

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Translation)
from_instruction = parser.get_format_instructions()
from_instruction
# translated_text에서 정의한 출력 변수 이름(키)(translated_text), 출력 type:str, 출력하는 변수에 상세 내용:번역된 텍스트
# 위의 내용이 prompt의 instruction으로 만들어 진다.

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```'

In [51]:
prompt = PromptTemplate(
    template="{format_instructions}\n아래 내용을 번역해라\n{query}",
    input_variables=["query"],
    partial_variables={"format_instructions": from_instruction},
)

In [52]:
prompt.invoke({"query": query}).to_messages()

[HumanMessage(content='The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"translated_text": {"title": "Translated Text", "description": "번역된 텍스트", "type": "string"}}, "required": ["translated_text"]}\n```\n아래 내용을 번역해라\n안녕 세상', additional_kwargs={}, response_metadata={})]

In [53]:
chain = prompt | model | parser

out = chain.invoke({"query": query})

In [54]:
out

{'translated_text': 'Hello world'}

In [55]:
type(out)

dict

In [56]:
out['translated_text']

'Hello world'

In [57]:
##Output Parser를 붙이지 않으면 LLM Message 그대로 AIMessage가 된다.

chain2 = prompt | model

out2 = chain2.invoke({"query": query})

In [58]:
out2

AIMessage(content='```json\n{"translated_text": "Hello world"}\n```\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-1c4cf926-c4e5-490b-b51d-7262af5e8b3b-0', usage_metadata={'input_tokens': 178, 'output_tokens': 15, 'total_tokens': 193})

In [59]:
out2.content

'```json\n{"translated_text": "Hello world"}\n```\n'

### Custom output Parser
### output Parser를 자유롭게 만들기

In [66]:
from langchain_core.output_parsers import BaseOutputParser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""

    #LLM의 message가 text로 들어 온다.
    def parse(self, text):
        """Parse the output of an LLM call."""
        #print(text)
        return text.strip().split(", ")

In [67]:
# 요약 template
human_message_prompt = "'{text}' 여기서 키워드를 뽑아서 콤마로 구분해줘"
human_message_prompt_template = HumanMessagePromptTemplate.from_template(human_message_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt_template])

In [68]:
chain_with_comma_parser = chat_prompt_template | model | CommaSeparatedListOutputParser()

In [69]:
out = chain_with_comma_parser.invoke({"text": "LangChain is a framework for developing applications powered by language models."})

In [70]:
out

['LangChain', 'framework', 'language models', 'applications']

In [71]:
for keyword in out:
    print(keyword)

LangChain
framework
language models
applications
