In [1]:
# +----------------+
# |  Input Prompt  |
# +----------------+
#         |
#         v
# +----------------+
# |      LLM       |
# +----------------+
#         |
#         v
# +--------------------------+
# | PydanticOutputParser     |
# | (Validate + parse)       |
# +--------------------------+
#         |
#         v
# +--------------------------+
# |  Python Pydantic Object  |
# +--------------------------+

In [1]:
# STEP 1: Install the essentials
!pip install -q langchain langchain-groq langchain_community pydantic

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━[0m [32m1.9/2.5 MB[0m [31m59.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# STEP 2: Gather your spellbooks
from langchain_groq import ChatGroq
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder
)
from langchain_core.runnables import RunnableWithMessageHistory
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory
)
from langchain_core.output_parsers.pydantic import PydanticOutputParser
from pydantic import BaseModel, Field
from IPython.display import Markdown, display
from google.colab import userdata
import os

In [4]:
# STEP 3: Whisper your secret key to the winds
try:
    api_key = userdata.get("GROQ_API_KEY")
except Exception:
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        raise ValueError("GROQ_API_KEY not found. Please set it!")

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=api_key,
    temperature=0.3,
)

In [5]:
# STEP 4: Craft the Pydantic schema that your LLM must respect.
# -------------------------------------------------------------
# This model defines the expected output.
# The Output Parser will check that the LLM output matches this schema.
# -------------------------------------------------------------

class ConfusionMatrixExplanation(BaseModel):
    answer: str = Field(..., description="Explanation of what a confusion matrix is.")
    source: str = Field(..., description="Where this information came from.")

In [14]:
# STEP 5: Summon the PydanticOutputParser with your model.
# It will enforce validation automatically.
parser = PydanticOutputParser(pydantic_object=ConfusionMatrixExplanation)

In [24]:
# STEP 6: This is CRUCIAL.
# -------------------------------------------------------------
# You MUST tell your LLM how to format its output to match the Pydantic schema.
# The parser provides a format_instructions string you can inject in your prompt.
# -------------------------------------------------------------

# Get the format instructions
format_instructions = parser.get_format_instructions()

# Put a placeholder
system_msg = SystemMessagePromptTemplate.from_template(
    "You are a helpful assistant. Please format your response to match these instructions:\n{format_instructions}"
)

human_msg = HumanMessagePromptTemplate.from_template(
    "{input}"
)

# Compose the chat prompt
chat_prompt = ChatPromptTemplate.from_messages([
    system_msg,
    MessagesPlaceholder(variable_name="history"),
    human_msg
])

# Supply the format_instructions as partial
chat_prompt = chat_prompt.partial(format_instructions=format_instructions)

In [25]:
# STEP 7: Keep your memories safe.
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

In [26]:
# STEP 8: Combine your chain.
# -------------------------------------------------------------
# ⚡ IMPORTANT: Using PydanticOutputParser in the chain means the output will be a Pydantic model.
# The memory will store raw text.
# -------------------------------------------------------------
chat_chain = chat_prompt | llm | parser

# Wrap it with memory support
chatbot = RunnableWithMessageHistory(
    chat_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history"
)

In [27]:
# STEP 9: Test it!
session_id = "chat-session-pydantic-001"

user_inputs = [
    "Explain what a confusion matrix is with a short source reference.",
    "Now explain what precision and recall are, and cite any source."
]

print(f"Starting chat loop with session ID: {session_id}")

for input_text in user_inputs:
    print(f"\nUser: {input_text}")
    response_model = chatbot.invoke(
        {"input": input_text},
        config={"configurable": {"session_id": session_id}}
    )
    # This is your validated Pydantic object!
    display(Markdown(f"**Answer:** {response_model.answer}\n\n**Source:** {response_model.source}"))

print("\n--- Stored Chat History ---")
for message in store[session_id].messages:
    print(f"{message.type.capitalize()}: {message.content}")

Starting chat loop with session ID: chat-session-pydantic-001

User: Explain what a confusion matrix is with a short source reference.




**Answer:** A confusion matrix is a table used to evaluate the performance of a classification model, where the true classes are compared against the predicted classes, allowing for the calculation of metrics such as accuracy, precision, and recall.

**Source:** Wikipedia - Confusion Matrix


User: Now explain what precision and recall are, and cite any source.




**Answer:** Precision and recall are two fundamental metrics used to evaluate the performance of a classification model. Precision is the ratio of true positives (correctly predicted instances) to the sum of true positives and false positives (incorrectly predicted instances). It measures the accuracy of the model's positive predictions. Recall, on the other hand, is the ratio of true positives to the sum of true positives and false negatives (missed instances). It measures the model's ability to detect all instances of a particular class. In other words, precision answers the question 'How many of the predicted instances are actually positive?' while recall answers 'How many of the actual positive instances were predicted?'

**Source:** https://en.wikipedia.org/wiki/Precision_and_recall


--- Stored Chat History ---
