In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# runin Add ons
# ! pip install langchain
# ! pip install langchain_openai
# ! pip install pydantic
# ! pip install langchain_huggingface

# 1. With Structured OUTPUT - For those LLM which can send structured output

# 1.1 TypedDict

In [102]:
from langchain_openai import ChatOpenAI
from typing import TypedDict,  Annotated, Optional, Literal

In [None]:
api_key = 'your_api_key'

In [136]:

model = ChatOpenAI(api_key = api_key)

In [None]:
# schema
class Review(TypedDict):

    key_themes: Annotated[list[str], "Review key themes discussed"]
    summary: Annotated[str, "Brief summary of the review"]
    sentiment: Annotated[Literal["pos", "neg"], "Sentiment analysis of the review"]
    pros: Annotated[list[str], "Pros as per mentioned in the review"]
    cons: Annotated[Optional[list[str]], "Cons as per mentioned in the review"]
    country_of_origin: Annotated[Optional[list[str]], "Name of the countries of Origin explicitely mentioned in the Review"]
    
    

In [None]:
print(type(Review))

In [None]:
strutured_output = model.with_structured_output(Review)

In [None]:
feedback = """Kia cars have transformed their image over the years, moving from being seen as an affordable, budget-focused brand to becoming a global player that delivers stylish, well-equipped, and reliable vehicles. The design language, highlighted by the brand’s signature Tiger Nose grille, modern lighting, and sharp lines, gives Kia cars a premium presence on the road. Inside, the cabins are feature-rich with advanced infotainment systems, connected car technology, and safety features that rival more expensive brands. Performance is balanced, offering efficiency across petrol, diesel, hybrid, and electric options, with models like the EV6 showing the company’s strong push toward sustainability. Customers often appreciate the value for money that Kia provides, as the cars are competitively priced and supported by long warranties, which boosts buyer confidence.

However, there are a few downsides. In some markets, Kia’s resale value doesn’t match up to established Japanese competitors like Toyota or Honda, and in certain regions the service network is still expanding, which can limit convenience. Additionally, while the ride quality is generally good, some models may feel a bit firm compared to rivals. Despite these challenges, the overall sentiment around Kia cars is largely positive, with buyers recognizing their attractive design, advanced features, safety standards, and strong pricing advantage. Kia has successfully bridged the gap between affordability and premium appeal, positioning itself as a brand that delivers modern, practical, and stylish vehicles for a wide range of customers."""

In [None]:
result = strutured_output.invoke(feedback)

In [None]:
print(result)

# Observation: 
TypedDict doesnot validates and restricts the schema, i.e. the schema is for information purpose only and we can assign different datatype value to any field.

# 1.2 Pydantic

In [100]:
from pydantic import BaseModel, Field

In [None]:
class PydReview(BaseModel):

    key_themes: list[str] = Field(description = "Key themese mentioned in the review")
    summary: str = Field(decsription = "Summary of the review")
    sentiment: Literal["pos", "neg"] = Field(description = "Sentiment of the review")
    pros: Optional[list[str]] = Field(default="Great Car", description = "pros mentioned in the feedback") # if not mentioned then default is None 
    cons: Optional[list[str]] = Field( description = "cons mentioned in the feedback")
    country_of_origin: Annotated[Optional[list[str]], "Name of the countries of Origin explicitely mentioned in the Review"]

In [None]:
pyd_strutured_output = model.with_structured_output(PydReview)

In [None]:
pyd_result = pyd_strutured_output.invoke(feedback)
print(pyd_result)

In [None]:
print(dict(pyd_result))
print(pyd_result.model_dump_json())

# Observation
1. Pydantic provides schema validation
2. pydantic output can be converted to dict or json

# 1.3 JSON

In [None]:
JsReview = {
    "title": "Review",
    "type": "object",
    "properties": {
        "key_themes": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "key themes listed in feedback"
        },
        "summary": {
            "type": "string",
            "description": "summary of the feedback in 50 words"
        },
        "sentiment": {
            "type": "integer",
            "enum": ["pos", "neg"],
            "description": "sentiment analysis of review"
        },
        "pros": {
            "type": ["array", "null"],
            "items": {
                "type": "string"
            },
            "description": "pros/benefits mentioned in feedback"
        },
        "cons": {
            "type": ["array", "null"],
            "items": {
                "type": "string"
            },
            "description":"cons/limitations mentioned in feedback. If none is found mention 'No cons found'"
        },
        "country_of_origin": {
            "type": "string",
            "description": "country of origin if mentioned in the feedback. Else mention 'No country mentioned'"
        }
    },
    "required": ["key_themes", "summary", "sentiment"]
}

In [None]:
js_strutured_output = model.with_structured_output(JsReview)

In [None]:
js_result = js_strutured_output.invoke(feedback)
print(js_result)

# Observation
* schema validation is not available
* no explicit default value field
* pydantic os preffered as compared to json

# 2. With Output Parsers - For LLM  which donot support in built structured ouput
it can be used with LLM supporting structured output as well

# 2.1 String Ouput Parser

**FLOW** 

1. Initiate the model


   Using Parsers
   2. Parser
   3. create all templates
   4. Chain creation
   5. invoking model
   6. Result


   Without Parsers
   2. create all templates
   3. 1st prompt creation
   4. invoke model
   5. 2nd prompt creation
   6. invoke model
   7. 3rd prompt creation
   8. invoke model
   9. use parser
       

In [47]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [None]:
hug_api_key = 'your_api_key' 

In [42]:
llm = HuggingFaceEndpoint(
    repo_id = "google/gemma-2-2b-it",
    task="text-generation",
    huggingfacehub_api_token = hug_api_key
)

In [43]:
chat_model = ChatHuggingFace(llm=llm )

In [None]:
# 1. template creation

In [29]:
template1 = PromptTemplate(
    template="Write a detailed report on the topic {topic}",
    input_variable = ["topic"]
)

In [30]:
template2 = PromptTemplate(
    template="Write a short summary of the following text :{report}",
    input_variable = ["report"]
)

In [None]:
# 2. prompt creation
# 3. invoke model

In [52]:
# langchain flow without using output parsers
prompt1 = template1.invoke({"topic": "Daily Eelectronic items"})

result1 = chat_model.invoke(prompt1)

prompt2 = template2.invoke({"report": result1.content})

result2 = chat_model.invoke(prompt2)

In [None]:
# langchain flow with output parsers

In [None]:
# 2. paser creation

In [49]:
str_output_parser = StrOutputParser()

In [None]:
# 3. chain creation

In [50]:
# parsing output parser using Chains

chain = template1 | chat_model | str_output_parser | template2 | chat_model | str_output_parser

In [None]:
# 4. invoke model

In [51]:
result = chain.invoke({"topic": "Daily Eelectronic items"})

print(result)

This report provides a comprehensive overview of the widespread prevalence of electronic devices in modern life, categorizing them into major areas like smartphones, laptops, smartwatches, and home devices. It explores the key benefits and  trends driving these technological advancements, emphasizing the increasing integration of AI, increased connectivity, and the rise of the Metaverse.  

The report delves into the positive contributions of these technologies, such as enhancing productivity at work and home, offering a wealth of entertainment options, and fostering social connection.  However, it also acknowledges the ethical and social complexities raised by their ubiquitous usage, emphasizing the need to address concerns regarding digital divides, environmental impact, privacy, and mental health.  

Finally, the report briefly discusses future technological trends that will likely shape the future of electronic devices, including the integration of AI and advanced connectivity, the

# 2.2 JSON Output Parser

In [54]:
from langchain_core.output_parsers import JsonOutputParser

In [None]:
# 1. parser creation

In [55]:
json_parser = JsonOutputParser()

In [None]:
# 2. template creation

In [59]:
json_template = PromptTemplate(
    template= "Provide the name, age, city and designation of a fictional character of the Anime : {name}. Use {format_instructions}",
    input_variables = ["name"],
    partial_variables = {'format_instructions': json_parser.get_format_instructions()}
)

In [None]:
# 3. chain creation
# 4. invoke model

In [66]:
json_chain = json_template | chat_model | json_parser
print(json_chain.invoke({"name": "Demon Slayer"}))

{'name': 'Shinobu Kocho', 'age': 18, 'city': ' matahari pasa subara', 'designation': ' Hashira, Insect Breathing Specialist'}


OR

In [None]:
# 3. prompt creation
# 4. invoke model

In [96]:
json_prompt = json_template.invoke({"name": "Naruto"}) 

json_result = chat_model.invoke(json_prompt)

#json_parser.parse(json_result)

print(json_parser.parse(json_result.content))

{'name': 'Kakashi Hatake', 'age': 37, 'city': ' Konohagakure', 'designation': 'Jonin, Sharingan user, Team 7 sensei'}


# OBSERVATION

Though we are able to generate the output as Json but there is no way to implement a specific json schema.

# 2.3 STRUCTURED Output Parser

In [67]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [None]:
# 1. parser creation

In [85]:
schema = [
    ResponseSchema(name = 'name', description='name of the character'),
    ResponseSchema(name='age', description='age of the character'),
    ResponseSchema(name='designation', description='Designation in the series'),
    ResponseSchema(name='speciality', description='Any special move, skill or fact about the character')
]

structured_parser = StructuredOutputParser.from_response_schemas(schema)

In [None]:
# 2. template creation

In [87]:
structured_template = PromptTemplate(
    template= "Provide the name, age, city, designation, skill of a fictional character of the Anime : {name}. {format_instructions}",
    input_variables = ["name"],
    partial_variables = {'format_instructions': structured_parser.get_format_instructions()}
)

In [70]:
# 3. chain creation
# 4 . invoke model

In [94]:
structured_chain = structured_template | chat_model | structured_parser 
result = structured_chain.invoke({"name": "Demon Slayer"})

print(result)

{'name': 'Giyu Tomioka', 'age': '19', 'designation': 'Former Water Hashira, current Water Hashira', 'speciality': 'Swordsmanship, Water Breathing Style, emotional and rational stability'}


# Observation

StructuredOutputParser doesnot performs schema validation

# 2.4 PYDANTIC Output Parser

In [116]:
from langchain_core.output_parsers import PydanticOutputParser

In [129]:
class PySchema1(BaseModel):
    anime_name: str = Field(description="name of an anime from list of top 5 anime of 2025 ")
    

In [131]:
pydantic_parser1 = PydanticOutputParser(pydantic_object=PySchema1)

In [132]:
# pydantic_template1 = PromptTemplate(
#     template= "Provide the name of one anime from the list of top 5 anime of 2025.\n\n{format_instructions}",
#     input_variables = [],
#     partial_variables = {'format_instructions': pydantic_parser1.get_format_instructions()}
# )

pydantic_template1 = PromptTemplate(
    template=(
        "Provide the name of one anime from the list of top 5 anime of 2025.\n\n"
        "You must respond ONLY in the following JSON format:\n{format_instructions}"
    ),
    input_variables=[],
    partial_variables={"format_instructions": pydantic_parser1.get_format_instructions()},
)

In [147]:
class PySchema(BaseModel):
    anime: str = Field(description="name of the anime provided by the model")
    name: str = Field(description="name of the character")
    age: int = Field(description="age of the character")
    city: str = Field(description = "city of the person")
    skills: list[str] = Field(description = "List 5 skills")
    designation: Optional[str] = Field(description = "State an official designation", default="Jonin")
    character:  Literal["hero", "villain", "mixed", "not important"] = Field(description = "Whether the characted is a hero, villain both sides or not important in the series")
    

In [148]:
pydantic_parser = PydanticOutputParser(pydantic_object=PySchema)

In [149]:
pydantic_template = PromptTemplate(
    template= "Provide the anime_name as anime, name, age, city, designation, skills, character type of a fictional character of the Anime : {anime_name}.\n\n {format_instructions}",
    input_variables = ["anime_name"],
    partial_variables = {'format_instructions': pydantic_parser.get_format_instructions()}
)

In [154]:
# not working with chat_model - Hugging face
pydantic_chain = pydantic_template1 | model | pydantic_parser1 | pydantic_template | model | pydantic_parser
result = pydantic_chain.invoke({})

print(result)

# anime='Attack on Titan: The Final Season Part 2' name='Eren Yeager' age=19 city='Shiganshina' 
# skills=['Titan Shifting', 'Hand-to-hand combat', 'Leadership', 'Omnidirectional Mobility Gear proficiency', 'Strategy'] designation='Scout Regiment Member'

# anime='Demon Slayer: Kimetsu no Yaiba' name='Tanjiro Kamado' age=15 city='Tokyo' 
# skills=['Breath of Water', 'Sense of Smell', 'Swordsmanship', 'Combat Skills', 'Courage'] designation='Demon Slayer'

anime='Attack on Titan: The Final Season Part 2' name='Eren Yeager' age=19 city='Shiganshina District' skills=['Titan Shifter', 'Hand-to-hand combat', 'Leadership', 'Strategic thinking', 'Determination'] designation='Jaegerist Leader' character='hero'


# Obervation
1. We can implement schema enforcement in pydantic
2. we can implement schema validation 