In [None]:
%pip install --upgrade langchain langchain-core langchain-community


In [None]:
from dotenv import load_dotenv, find_dotenv
from langchain_core.prompts import (SystemMessagePromptTemplate, 
                                    HumanMessagePromptTemplate, 
                                    ChatPromptTemplate,
                                    PromptTemplate)
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import PydanticOutputParser, JsonOutputParser
from langchain_core.output_parsers import CommaSeparatedListOutputParser 
# Correct
from typing import Optional
from pydantic import BaseModel, Field


In [None]:
llm = ChatOpenAI(model = 'gpt-4o-mini', temperature=0.4)

In [None]:
# 1. Declearing a Pydantic Object: Kind of declearing a schema, 
# This schema can be used as a OutputParser to generate the output in this schema format

class Joke(BaseModel):
    """Joke to tell user"""

    setup: str = Field(description='The setup of the Joke')
    punchline: str= Field(description='The punchline of the Joke')
    rating : Optional[int] = Field(description='The rating for overall joke from 1 to 10', default=None, ge=1, le=10)



In [None]:
#Analogy ---- Create a table named Joke which has 3 columns
#             setup : str , not null
#             punchline: str , not null
#             rating: int , null, None

#
# Constraints for Field object
#==============================
# ge --- greater than equal to
# le --- less than equal to
# gt --- greater than
# lt --- less than

parser = PydanticOutputParser(pydantic_object= Joke)

In [None]:
instruction = parser.get_format_instructions() #Extract prompt that you may use in the prompt template
instruction

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"description": "Joke to tell user", "properties": {"setup": {"description": "The setup of the Joke", "title": "Setup", "type": "string"}, "punchline": {"description": "The punchline of the Joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"maximum": 10, "minimum": 1, "type": "integer"}, {"type": "null"}], "default": null, "description": "The rating for overall joke from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline"]}\n```'

In [None]:
# Using this : 
# 1. Using Pydantic object inside the prompt (Format Instructions)
# 2. There are two ways: Directly use it as we have seen till now as | parser
# 3: Passing pydantic object directly to LLM:


prompt = PromptTemplate(
    template = """
Answer the query with the joke. Here is your formatting instructions.
{format_instruction}
query: {query}
Answer: """,

# 'input_variables' specifies which keys the template expects at runtime (here: 'query')

input_variable= ['query'],

# 'partial_variables' are values you want to "hard-code" or fill in at creation time.
# Here, format_instruction is filled automatically with instructions from the parser
partial_variables= {'format_instruction': parser.get_format_instructions()}
)   

# The result is a prompt template that, when given a 'query', will fill in both
# 'query' (user's actual question) and 'format_instruction' (instructions for output formatting).

# Step 2: Create a chain by connecting the prompt template to the LLM
# This means: the input dict will first fill the prompt, then be sent to the LLM.
chain = prompt | llm


output = chain.invoke({
    'query':'Tell me a joke abouve the cat'
})

print(output.content)

Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!


In [None]:
# Way 2: Directly using as parser
chain = prompt | llm | parser

output = chain.invoke({'query': 'Tell me a joke about Dog'})
print(output)

setup='Why did the dog sit in the shade?' punchline="Because he didn't want to become a hot dog!" rating=7


In [None]:
# Method 3: Passing pydantic object directly to LLM:

# Simple call:

output = llm.invoke('Tell me a Joke about Elephant')
print(output.content)



structured_llm = llm.with_structured_output(Joke)

structured_output = structured_llm.invoke('Tell me a Joke about Elephant')
print(structured_output)

Why do elephants never use computers?

Because they're afraid of the mouse! üêòüñ±Ô∏è
setup="Why don't elephants use computers?" punchline="Because they're afraid of the mouse!" rating=8


In [None]:
# JsonOutputParser

json_parser = JsonOutputParser(pydantic_object=Joke)
print(json_parser.get_format_instructions())

STRICT OUTPUT FORMAT:
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema (shown in a code block for readability only ‚Äî do not include any backticks or Markdown in your output)

In [None]:
prompt = PromptTemplate(
    template="""
Answer the user query with a Joke, here are the formatting instructions:
{format_instruction}
Query: {query}
Answer: """,

input_variables=['query'],
partial_variables={'format_instruction': json_parser.get_format_instructions()}
)

chain = prompt | llm
output = chain.invoke({
    'query':'Tell me a joke about computer science'
})

print(output.content)

{"setup":"Why do programmers prefer dark mode?","punchline":"Because light attracts bugs!","rating":8}


In [None]:
# CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

print(parser.get_format_instructions())

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [None]:
prompt = PromptTemplate(template="""
Answer the user query with the list of values. Here is your formatting instruction.
                        {format_instruction},
                        query: {query},
                        Answer:  
""", 
input_variables = ['query'],
partial_variables={'format_instruction': parser.get_format_instructions()})

chain = prompt | llm

output = chain.invoke({'query':  'generate my website seo keywords. I have content about the NLP and LLM.'})

output.content

'NLP, LLM, natural language processing, language models, machine learning, AI, artificial intelligence, text analysis, deep learning, conversational AI, data science, semantic analysis, language understanding, chatbot development, speech recognition, sentiment analysis'