In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from typing import TypedDict, Annotated, Optional, Literal

* **`Annotated`**: Adds **contextual metadata or descriptions** to a type hint, helping tools (like LangChain) better understand the intended purpose or constraints of a field.
* **`Optional`**: Indicates that a variable or field **can either have the specified type or be `None`**, making it nullable.
* **`Literal`**: Restricts a type's possible values to a **finite, predefined set of specific literal values** (e.g., only "red", "green", or "blue" for a string).

In [2]:
# Load environment variables from a .env file. 🌍
# This securely loads API keys (like OPENAI_API_KEY) from a .env file,
# preventing them from being hardcoded in the script.
load_dotenv()

True

In [3]:
# Initialize the ChatOpenAI language model. 🤖
# This creates an instance of the OpenAI chat model (e.g., gpt-3.5-turbo or gpt-4
# if specified). This model will be used to process the text and extract structured information.
model = ChatOpenAI()

In [4]:
# Define the schema for the desired structured output using TypedDict. 📝
# This class acts as a blueprint for the JSON object we want the LLM to return.
# - TypedDict: Specifies that Review is a dictionary type with fixed keys and types.
# - Annotated: Provides extra metadata (like a description string) for each field.
#              This description is crucial as LangChain (and underlying LLMs) uses
#              it to understand what kind of data to extract for each field.
# - Optional: Indicates that a field might not always be present in the output.
# - Literal: Restricts the value of 'sentiment' to only "pos" (positive) or "neg" (negative).
class Review(TypedDict):
    # 'key_themes' will be a list of strings, with a clear description for the LLM.
    key_themes: Annotated[list[str], "Write down all the key themes discussed in the review in a list"]
    # 'summary' will be a string containing a brief summary.
    summary: Annotated[str, "A brief summary of the review"]
    # 'sentiment' must be either "pos" or "neg", with an explanation for the LLM.
    sentiment: Annotated[Literal["pos", "neg"], "Return sentiment of the review either negative, positive or neutral"] # Note: The prompt asks for "neutral" but Literal only allows "pos" or "neg". The LLM will likely pick the closest.
    # 'pros' is an optional list of strings.
    pros: Annotated[Optional[list[str]], "Write down all the pros inside a list"]
    # 'cons' is an optional list of strings.
    cons: Annotated[Optional[list[str]], "Write down all the cons inside a list"]
    # 'name' is an optional string for the reviewer's name.
    name: Annotated[Optional[str], "Write the name of the reviewer"]

In [5]:
# Create a structured output model from the base ChatOpenAI model. 🛠️
# The 'with_structured_output' method transforms the LLM into one that aims to
# return responses conforming to the provided 'Review' TypedDict schema.
# LangChain internally converts the TypedDict (with Annotations) into a JSON Schema
# and instructs the OpenAI model to generate output that adheres to it.
structured_model = model.with_structured_output(Review)



In [6]:
# Invoke the structured model with the review text. 💬
# The model will process this text and attempt to extract information
# into the fields defined in the 'Review' schema.
result = structured_model.invoke("""I recently got the Solanki True Wireless Earbuds, and they've been a mixed bag. The sound quality is genuinely impressive, offering crisp highs and decent bass for their size, and they fit surprisingly comfortably in my ears for long listening sessions. The battery life is also solid, easily getting me through a workday.
However, the touch controls are incredibly finicky, often registering accidental taps or failing to respond. The charging case is also a bit bulkier than I'd like, making it less pocket-friendly.

Pros:

Excellent sound quality
Comfortable fit
Good battery life

Review by Ujjwal Solanki
""")

# Print the extracted 'name' field from the structured result. 🎯
# The 'result' object will be a dictionary conforming to the 'Review' schema.
# You can access its fields like a regular Python dictionary.
print(result['name'])

Ujjwal Solanki
