In [4]:
import json
from IPython.display import Image
from pydantic import BaseModel
from langchain_core.output_parsers import PydanticOutputParser

In [5]:
from dotenv import load_dotenv
assert load_dotenv()

### official 

In [3]:
Image(url='https://wangwei1237.github.io/2023/09/20/Introduction-to-LangChain/langchain.png', width=500)

- https://github.com/hwchase17/langchain-0.1-guides/blob/master/output_parsers.ipynb
- lcel => agent
    - variable assignment 
    - prompt template
    - llm (with tools)
    - output parse

In [4]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

In [5]:
class Ingredient(BaseModel):
    name: str = Field(description="The name of the ingredient")
    quantity: str = Field(description="The specific unit of measurement corresponding to the quantity, such as grams, ounces, liters, etc.")
    unit: str = Field(description="The amount of the ingredient required for the recipe. This can be represented using various units such as grams, cups, teaspoons, etc.")

class Recipe(BaseModel):
    name: str = Field(description="The name of the recipe")
    ingredients: List[Ingredient] = Field(description="The list of ingredients for the recipe")

In [6]:
parser = PydanticOutputParser(pydantic_object=Recipe)

In [9]:
from rich.pretty import pprint
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Ingredient": {"properties": {"name": {"description": "The name of the ingredient", "title": "Name", "type": "string"}, "quantity": {"description": "The specific unit of measurement corresponding to the quantity, such as grams, ounces, liters, etc.", "title": "Quantity", "type": "string"}, "unit": {"description": "The amount of the ingredient required for the recipe. This can be represented using various units such as grams, cups, teaspoons, etc.", "title": "Unit", "type": "string"}}, "required": ["name", "quantity", "unit"], "title

#### converting messages

In [28]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

In [29]:
prompt = ChatPromptTemplate.from_template("Tell me a joke about {topic}")
model = ChatOpenAI(model='gpt-3.5-turbo')

In [30]:
chain = prompt | model

In [31]:
chain.invoke({'topic': 'pig'})

AIMessage(content='Why did the pig go to the casino? To play the slop machines!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 13, 'total_tokens': 29, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-dbb76cac-dc4e-4f72-8f2f-416cc8192a24-0', usage_metadata={'input_tokens': 13, 'output_tokens': 16, 'total_tokens': 29})

In [32]:
from langchain_core.output_parsers import StrOutputParser

In [33]:
parser = StrOutputParser()
chain |= parser

In [34]:
chain.invoke({'topic': 'pig'})

'Why did the pig go to the casino? Because he heard they had a lot of "squeal" machines!'

In [12]:
chain

ChatPromptTemplate(input_variables=['topic'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['topic'], input_types={}, partial_variables={}, template='Tell me a joke about {topic}'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7ebb83b6a0f0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7ebb83b6bf50>, root_client=<openai.OpenAI object at 0x7ebb83b29f40>, root_async_client=<openai.AsyncOpenAI object at 0x7ebb83b6a120>, model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [35]:
chain = prompt | model | parser
chain.invoke({'topic': 'pig'})

'Why did the pig go to the casino? \nTo play the slop machine!'

In [36]:
chain = {'topic': lambda x: x['input']} | prompt | model | parser
chain.invoke({'input': 'apple'})

"Why did the apple go to the doctor?\nBecause it wasn't peeling well!"

#### OpenAI Function Calling

In [15]:
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field, validator

In [16]:
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

openai_functions = [convert_to_openai_function(Joke)]
openai_functions

[{'name': 'Joke',
  'description': 'Joke to tell user.',
  'parameters': {'properties': {'setup': {'description': 'question to set up a joke',
     'type': 'string'},
    'punchline': {'description': 'answer to resolve the joke',
     'type': 'string'}},
   'required': ['setup', 'punchline'],
   'type': 'object'}}]

In [17]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

In [18]:
parser = JsonOutputFunctionsParser()

In [19]:
chain = prompt | model.bind(functions=openai_functions) | parser

In [20]:
chain.invoke({'topic': 'pig'})

{'setup': 'Why did the pig go to the casino?',
 'punchline': 'To play the slop machine!'}

### PydanticOutputParser

In [16]:
class WritingScore(BaseModel):
    readability: int
    conciseness: int

In [17]:
schema = WritingScore.schema()
schema

{'title': 'WritingScore',
 'type': 'object',
 'properties': {'readability': {'title': 'Readability', 'type': 'integer'},
  'conciseness': {'title': 'Conciseness', 'type': 'integer'}},
 'required': ['readability', 'conciseness']}

In [18]:
resp = """```
{
  "readability": 8,
  "conciseness": 9
}
```"""

In [19]:
parser = PydanticOutputParser(pydantic_object=WritingScore)

In [20]:
parser.parse(resp)

WritingScore(readability=8, conciseness=9)

### json output

- https://python.langchain.com/docs/integrations/chat/openai/#stricttrue

In [27]:
model = ChatOpenAI(model='gpt-4o')
model.invoke('hi')

AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3537616b13', 'finish_reason': 'stop', 'logprobs': None}, id='run-52462bb3-54fd-47e9-a7db-5bd10b8e94a6-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})

In [28]:
from langchain_core.tools import tool

@tool
def add(a: int, b: int) -> int:
    """Adds a and b.
    
    Args:
        a: first int
        b: second int
    """
    return a + b

@tool
def multiply(a: int, b: int) -> int:
    """Multiplies a and b.
    
    Args:
        a: first int
        b: second int
    """
    return a * b


In [35]:
from langchain_core.messages import HumanMessage
# llm_with_tools = model.bind_tools([add, multiply], strict=True)
llm_with_tools = model.bind_tools([add, multiply], )
messages = [HumanMessage('what is 3*12? Also, what is 11+49?')]
ai_msg = llm_with_tools.invoke(messages)
messages.append(ai_msg)
ai_msg

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Q3ZHvvKR7uTC8pJe61sBu1vP', 'function': {'arguments': '{"a": 3, "b": 12}', 'name': 'multiply'}, 'type': 'function'}, {'id': 'call_UgcQTCjmQcfKr0i2uwHNgr7k', 'function': {'arguments': '{"a": 11, "b": 49}', 'name': 'add'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 50, 'prompt_tokens': 111, 'total_tokens': 161, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_e375328146', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-95c85515-6075-47e1-ba8c-f2ff98748837-0', tool_calls=[{'name': 'multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_Q3ZHvvKR7uTC8pJe61sBu1vP', 'type': 'tool_call'}, {'name': 'add', 'args': {'a': 11, 'b': 49}, 'id': 'call_UgcQTCjmQcfKr0i2uwHNgr7k', 'type': 'tool_call'}], usage_metadata={'input_tokens': 111, 'output_tokens': 50, 'total_tokens': 161})

In [36]:
ai_msg.tool_calls

[{'name': 'multiply',
  'args': {'a': 3, 'b': 12},
  'id': 'call_Q3ZHvvKR7uTC8pJe61sBu1vP',
  'type': 'tool_call'},
 {'name': 'add',
  'args': {'a': 11, 'b': 49},
  'id': 'call_UgcQTCjmQcfKr0i2uwHNgr7k',
  'type': 'tool_call'}]

In [38]:
ai_msg.tool_calls[0]['args']

{'a': 3, 'b': 12}

In [39]:
multiply.invoke(ai_msg.tool_calls[0]['args'])

36

### example

In [40]:
from pydantic import BaseModel

In [41]:
class Step(BaseModel):
    explanation: str
    output: str
class MathResp(BaseModel):
    steps: list[Step]
    final_answer: str

In [42]:
tools = [MathResp]
llm = ChatOpenAI(model='gpt-4o')
math_tutor = llm.bind_tools(tools)

as_msg = math_tutor.invoke('solve 8x+31=2')

In [43]:
as_msg

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_i2aCQLtmp96fSp1ecfc6MJLF', 'function': {'arguments': '{"steps":[{"explanation":"Subtract 31 from both sides of the equation to isolate the term with the variable.","output":"8x + 31 - 31 = 2 - 31"},{"explanation":"Simplify both sides of the equation.","output":"8x = -29"},{"explanation":"Divide both sides of the equation by 8 to solve for x.","output":"8x / 8 = -29 / 8"},{"explanation":"Simplify the right side of the equation.","output":"x = -29/8"}],"final_answer":"x = -29/8"}', 'name': 'MathResp'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 132, 'prompt_tokens': 58, 'total_tokens': 190, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_e375328146', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-bd1cd2e1-ab65-49da-8f0b-0caf665f830e-0', tool_calls=[{'name': 'MathResp', 'args': {'steps': [{'explanation': 'Su

In [44]:
as_msg.tool_calls

[{'name': 'MathResp',
  'args': {'steps': [{'explanation': 'Subtract 31 from both sides of the equation to isolate the term with the variable.',
     'output': '8x + 31 - 31 = 2 - 31'},
    {'explanation': 'Simplify both sides of the equation.',
     'output': '8x = -29'},
    {'explanation': 'Divide both sides of the equation by 8 to solve for x.',
     'output': '8x / 8 = -29 / 8'},
    {'explanation': 'Simplify the right side of the equation.',
     'output': 'x = -29/8'}],
   'final_answer': 'x = -29/8'},
  'id': 'call_i2aCQLtmp96fSp1ecfc6MJLF',
  'type': 'tool_call'}]

In [46]:
as_msg.tool_calls[0]['args']

{'steps': [{'explanation': 'Subtract 31 from both sides of the equation to isolate the term with the variable.',
   'output': '8x + 31 - 31 = 2 - 31'},
  {'explanation': 'Simplify both sides of the equation.',
   'output': '8x = -29'},
  {'explanation': 'Divide both sides of the equation by 8 to solve for x.',
   'output': '8x / 8 = -29 / 8'},
  {'explanation': 'Simplify the right side of the equation.',
   'output': 'x = -29/8'}],
 'final_answer': 'x = -29/8'}

In [52]:
for i, step in enumerate(as_msg.tool_calls[0]['args']['steps']):
    print(f'step: {i+1}\nexplanation: {step['explanation']}\noutput: {step['output']}\n')
print(f'final answer: {as_msg.tool_calls[0]['args']['final_answer']}')

step: 1
explanation: Subtract 31 from both sides of the equation to isolate the term with the variable.
output: 8x + 31 - 31 = 2 - 31

step: 2
explanation: Simplify both sides of the equation.
output: 8x = -29

step: 3
explanation: Divide both sides of the equation by 8 to solve for x.
output: 8x / 8 = -29 / 8

step: 4
explanation: Simplify the right side of the equation.
output: x = -29/8

final answer: x = -29/8
