In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field # Pydantic for defining structured schemas

# Load environment variables from a .env file. 🌍
# This ensures sensitive information, like your Hugging Face API token,
# is loaded securely from an external file.
load_dotenv()

True

In [2]:
# Define the HuggingFaceEndpoint model. 🤖
# This sets up the connection to the 'google/gemma-2-2b-it' model hosted on
# the Hugging Face Inference API. This model is a relatively small, instruction-tuned LLM.
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation"
)

In [3]:
# Wrap the HuggingFaceEndpoint with ChatHuggingFace. 💬
# This adapter makes the Hugging Face model compatible with LangChain's
# chat-specific interfaces, enabling it to handle conversational prompts.
model = ChatHuggingFace(llm=llm)

In [4]:
# Define the Pydantic schema for the desired output. 📝
# This `Person` class specifies that we expect an output object with:
# - `name`: a string with a description.
# - `age`: an integer, strictly greater than 18, with a description.
# - `city`: a string with a description.
class Person(BaseModel):
    name: str = Field(description='Name of the person')
    age: int = Field(gt=18, description='Age of the person') # 'gt=18' enforces age > 18
    city: str = Field(description='Name of the city the person belongs to')

In [5]:
# Create a PydanticOutputParser. ⚙️
# This parser is initialized with the `Person` Pydantic model. Its job is to:
# 1. Generate text-based instructions for the LLM on how to format its output
#    to match the `Person` schema.
# 2. Attempt to parse the raw text output from the LLM into a `Person` object.
#    If the LLM's output doesn't conform to the schema (e.g., not valid JSON,
#    wrong types, missing fields), the parser will typically raise an error.
parser = PydanticOutputParser(pydantic_object=Person)

In [6]:
# Define the prompt template. ✍️
# - `template`: Contains the main instruction for the LLM.
# - `input_variables`: Defines the dynamic parts of the prompt (here, 'place').
# - `partial_variables`: This is crucial. It injects the `format_instruction`
#    generated by the `PydanticOutputParser` into the prompt. This instruction
#    tells the LLM *how* to format its response (e.g., "Output should be a JSON
#    object with keys 'name' (string), 'age' (integer > 18), 'city' (string).").
template = PromptTemplate(
    template='Generate the name, age and city of a fictional {place} person \n {format_instruction}',
    input_variables=['place'],
    partial_variables={'format_instruction':parser.get_format_instructions()}
)

In [7]:
# Create a LangChain Expression Language (LCEL) chain. 🔗
# The `|` operator pipes the output of one component to the input of the next.
# 1. `template`: Takes `{'place':'sri lankan'}` and generates the full prompt string.
# 2. `model`: Sends the prompt to the `google/gemma-2-2b-it` model.
#    The model generates a text response based on the prompt, attempting to follow
#    the format instructions (which is the challenging part for smaller models).
# 3. `parser`: Receives the raw text output from the model and attempts to parse it
#    into a `Person` Pydantic object. If the output isn't in the expected JSON format,
#    this step will likely fail.
chain = template | model | parser

In [8]:
# Invoke the entire chain with the initial input. 🚀
# The chain executes sequentially, aiming to produce a `Person` object.
final_result = chain.invoke({'place':'Xandar'}) # Xandar: Gardians of the Galaxy fans will get it! 😉

# Print the final result. 📊
# If successful, `final_result` will be an instance of the `Person` Pydantic model.
# If parsing fails due to the LLM's inability to generate structured output,
# an error will be raised before this line is reached.
print(final_result)

name='Anya Nova' age=32 city='Cygnus'
