In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from typing import TypedDict, Annotated, Optional, Literal # These are imported but not directly used for schema definition in this specific example
from pydantic import BaseModel, Field # These are imported but not directly used for schema definition in this specific example

# Load environment variables from a .env file.
# This securely loads API keys (like OPENAI_API_KEY) from an environment file,
# keeping sensitive information out of the main codebase.
load_dotenv()

True

In [2]:
# Initialize the ChatOpenAI language model.
# This creates an instance of the OpenAI chat model (e.g., gpt-3.5-turbo or gpt-4,
# depending on your environment configuration and OpenAI setup). This model will
# be used to process text and extract structured information.
model = ChatOpenAI()

In [3]:
# Define the schema for the desired structured output directly as a JSON Schema dictionary.
# This is an alternative to defining a Python class (like Pydantic BaseModel or TypedDict).
# The JSON Schema specifies the expected keys, their types, descriptions, and constraints.
# - "title": A descriptive name for the schema.
# - "type": "object" indicates the top-level structure is a JSON object.
# - "properties": Defines the individual fields (keys) and their respective schemas.
#   - "key_themes": An array of strings with a description.
#   - "summary": A string with a description.
#   - "sentiment": A string that must be one of "pos" or "neg" ("enum").
#   - "pros", "cons", "name": Can be an array of strings/string or null ("type": ["array", "null"]).
# - "required": Lists the fields that *must* be present in the output.
json_schema = {
  "title": "Review",
  "type": "object",
  "properties": {
    "key_themes": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "Write down all the key themes discussed in the review in a list"
    },
    "summary": {
      "type": "string",
      "description": "A brief summary of the review"
    },
    "sentiment": {
      "type": "string",
      "enum": ["pos", "neg"], # Note: The description says "negative, positive or neutral" but enum only allows "pos" or "neg". The model will try to pick from the enum values.
      "description": "Return sentiment of the review either negative, positive or neutral"
    },
    "pros": {
      "type": ["array", "null"], # Allows 'pros' to be either an array of strings or null
      "items": {
        "type": "string"
      },
      "description": "Write down all the pros inside a list"
    },
    "cons": {
      "type": ["array", "null"], # Allows 'cons' to be either an array of strings or null
      "items": {
        "type": "string"
      },
      "description": "Write down all the cons inside a list"
    },
    "name": {
      "type": ["string", "null"], # Allows 'name' to be either a string or null
      "description": "Write the name of the reviewer"
    }
  },
  "required": ["key_themes", "summary", "sentiment"] # These fields must be present in the output
}

In [4]:
# Create a structured output model from the base ChatOpenAI model using the JSON Schema. 🛠️
# The 'with_structured_output' method instructs the LLM to generate its response
# according to the provided schema. When a raw JSON schema dictionary is passed,
# the output of the structured_model.invoke() call will be a standard Python dictionary
# that conforms to this schema.
structured_model = model.with_structured_output(json_schema)



In [5]:
# Invoke the structured model with the review text.
# The model will process the input text and extract information,
# attempting to fit it into the structure defined by 'json_schema'.
result = structured_model.invoke("""I recently got the Solanki True Wireless Earbuds, and they've been a mixed bag. The sound quality is genuinely impressive, offering crisp highs and decent bass for their size, and they fit surprisingly comfortably in my ears for long listening sessions. The battery life is also solid, easily getting me through a workday.
However, the touch controls are incredibly finicky, often registering accidental taps or failing to respond. The charging case is also a bit bulkier than I'd like, making it less pocket-friendly.

Pros:

Excellent sound quality
Comfortable fit
Good battery life

Review by Ujjwal Solanki
""")

# Print the entire structured result.
# The 'result' object will be a Python dictionary, where the keys and values
# adhere to the structure and types defined in the 'json_schema'.
print(result)

{'key_themes': ['sound quality', 'comfortable fit', 'battery life', 'touch controls', 'charging case'], 'summary': 'Mixed bag with impressive sound quality, comfortable fit, and good battery life but finicky touch controls and bulky charging case.', 'sentiment': 'pos', 'pros': ['Excellent sound quality', 'Comfortable fit', 'Good battery life'], 'cons': ['Finicky touch controls', 'Bulky charging case'], 'name': 'Ujjwal Solanki'}
