In [1]:
from langchain_community.document_loaders import WebBaseLoader # For loading web page content
from langchain_openai import ChatOpenAI # For interacting with OpenAI chat models
from langchain_core.output_parsers import StrOutputParser # For parsing LLM output to a string
from langchain_core.prompts import PromptTemplate # For creating reusable prompt structures
from dotenv import load_dotenv # For loading environment variables

# Load environment variables from a .env file. 🌍
# This is important for securely loading your API keys (like OPENAI_API_KEY)
# so they are not directly visible in your script.
load_dotenv()

# Initialize the ChatOpenAI language model. 🤖
# This creates an instance of the OpenAI chat model (e.g., gpt-3.5-turbo or gpt-4).
# This model will be used to answer the question based on the loaded web page content.
model = ChatOpenAI()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# Define the PromptTemplate for question answering. 📝
# This template takes two input variables: 'question' (the query) and 'text' (the context).
# It instructs the LLM to answer the question using only the provided text.
prompt = PromptTemplate(
    template='Answer the following question \n {question} from the following text - \n {text}',
    input_variables=['question','text']
)

In [3]:
# Initialize a StrOutputParser. 📄
# This parser is responsible for taking the raw output from the LLM (which is
# typically a message object) and extracting its content as a simple string.
# This makes the LLM's answer directly usable or printable.
parser = StrOutputParser()

In [4]:
# Define the URL of the web page to load. 🌐
# This is a Amazon product page for an Hoka shoe.
url = 'https://www.amazon.com/HOKA-ONE-Mens-Running-Shoes/dp/B0B54R2468'

In [5]:
# Initialize the WebBaseLoader with the specified URL. 🔗
# This loader will fetch the content of the web page.
loader = WebBaseLoader(url)

In [6]:
# Load the content from the web page. 📚
# The `loader.load()` method fetches the web page and returns a list of `Document` objects.
# For web pages, this list typically contains one Document object with the page's main text content.
docs = loader.load()

In [7]:
# Create a LangChain Expression Language (LCEL) chain. ⛓️
# The `|` operator (pipe) connects components, where the output of one becomes the input for the next.
# 1. `prompt`: Takes a dictionary with 'question' and 'text' as input and formats the prompt for the LLM.
# 2. `model`: Receives the formatted prompt and generates an answer.
# 3. `parser`: Extracts the raw string answer from the model's output.
chain = prompt | model | parser

In [8]:
# Invoke the chain with the specific question and the loaded web page content. 🚀
# The `docs[0].page_content` accesses the main text content of the first (and likely only)
# document loaded from the web page. This content serves as the 'text' context for the prompt.
print(chain.invoke({'question':'What is the product that we are talking about?', 'text':docs[0].page_content}))

The product being discussed is the HOKA ONE ONE Bondi 8 Men's Shoes for road running.
