In [None]:
%%capture --no-stderr
%pip install uv
%uv pip install chromadb==0.4.22
%uv pip install tiktoken==0.9.0
%uv pip install langchain==0.3.20
%uv pip install langchain-community==0.3.10
%uv pip install langchain-openai==0.3.1
%uv pip install langchainhub
%uv pip install langchain-text-splitters==0.3.6
%uv pip install langgraph==0.3.1
%uv pip install openai==1.65.3
%uv pip install PyMuPDF==1.25.3
%uv pip install pypdf==5.3.1
%uv pip install pillow==11.1.0
%uv pip install beautifulsoup4==4.13.3

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2") == "true"

print("LANGCHAIN_PROJECT:", LANGCHAIN_PROJECT)
print("LANGCHAIN_TRACING_V2:", LANGCHAIN_TRACING_V2)

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [    
    "https://www.dell.com/en-us/shop/dell-laptops/inspiron-14-2-in-1-laptop/spd/inspiron-14-7440-2-in-1-laptop",
    "https://www.dell.com/en-us/shop/dell-laptops/latitude-5450-laptop/spd/latitude-14-5450-laptop",
    "https://www.dell.com/en-us/shop/dell-laptops/latitude-7450-laptop/spd/latitude-14-7450-2-in-1-laptop",
    "https://www.dell.com/en-us/shop/dell-laptops/xps-14-laptop/spd/xps-14-9440-laptop",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    #chunk_size=3000, chunk_overlap=50
    chunk_size=900, chunk_overlap=150
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
)
#retriever = vectorstore.as_retriever()
# Updating to set K to 10 to retrieve more results instead of the default 4, which should help with recall for the agent, especially
# If shifting to smaller chunks.
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 10}
)

In [None]:
print(docs_list)

In [None]:
from langchain_core.tools import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_products",
    "Search and return information  about a product for customer to purchase",
)

tools = [retriever_tool]

In [None]:

from typing import Annotated, Sequence, Literal
from typing_extensions import TypedDict

from langchain_core.messages import BaseMessage, HumanMessage

from langgraph.graph.message import add_messages
###
MAX_REWRITES = 3  # hard stop for rewrite loops
###
class AgentState(TypedDict):
    # The add_messages function defines how an update should be processed
    # Default is to replace. add_messages says "append"
    messages: Annotated[Sequence[BaseMessage], add_messages]
    rewrite_attempts: int


In [None]:

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

from pydantic import BaseModel, Field


from langgraph.prebuilt import tools_condition

### Edges


def grade_documents(state) -> Literal["generate", "rewrite"]:
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (messages): The current state

    Returns:
        str: A decision for whether the documents are relevant or not
    """

    print("---CHECK RELEVANCE---")

    class grade(BaseModel):
        """Binary score for relevance check."""
        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    model = ChatOpenAI(temperature=0, model="gpt-4o-mini", streaming=True)
    llm_with_tool = model.with_structured_output(grade)

    prompt = PromptTemplate(
        template="""You are a salesperson customizing the design of Dell computer to recommend to the customer. 

        The customer wants to know the model name, CPU, memory, storage and price (dollar $). 


        Here is the retrieved document: 

 {context} 


        Here is the user question: {question} 

        If the document contains all keyword(s) and semantic meaning related to the user question, grade it as relevant. 

        Give a binary score ('yes' or 'no') to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )

    chain = prompt | llm_with_tool

    messages = state["messages"]
    last_message = messages[-1]

    question = messages[0].content
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: DOCS RELEVANT---")
        return "generate"
    print("---DECISION: DOCS NOT RELEVANT---")
    print(score)
    return "rewrite"


### Nodes


def agent(state):
    """
    Invokes the agent model to generate a response based on the current state. Given
    the question, it will decide to retrieve using the retriever tool, or simply end.
    """
    print("---CALL AGENT---")
    messages = state["messages"]
    model = ChatOpenAI(temperature=0, streaming=True, model="gpt-4o-mini")
    model = model.bind_tools(tools)
    response = model.invoke(messages)
    return {"messages": [response]}


def rewrite(state):
    """
    Transform the query to produce a better question and track how many rewrites we've done.
    """

    print("---TRANSFORM QUERY---")
    messages = state["messages"]
    question = messages[0].content

    msg = [
        HumanMessage(
            content=f""" 

    Look at the input and try to reason about the underlying semantic intent / meaning. 

    Here is the initial question:

 ------- 

    {question} 

 ------- 

    Formulate an improved question: """,
        )
    ]

    model = ChatOpenAI(temperature=0, model="gpt-4o-mini", streaming=True)
    response = model.invoke(msg)
    attempts = state.get("rewrite_attempts", 0) + 1
    return {"messages": [response], "rewrite_attempts": attempts}


def route_after_rewrite(state) -> Literal["continue", "stop"]:
    attempts = state.get("rewrite_attempts", 0)
    if attempts >= MAX_REWRITES:
        print(f"---MAX REWRITES REACHED ({attempts})---")
        return "stop"
    return "continue"

import json
import re
from typing import Any, Dict, List, Union
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser


def _docs_to_text(docs: Union[str, List[Any]]) -> str:
    """Accepts either a pre-formatted context string OR a list of Documents."""
    if isinstance(docs, str):
        return docs
    return "\n\n".join(d.page_content for d in docs)


## Version 1 of generate node

In [None]:
### Main generation node
def generate(state):
    """
    Generate answer

    Args:
        state (messages): The current state

    Returns:
         dict: The updated state with re-phrased question
    """
    print("---GENERATE---")
    messages = state["messages"]
    question = messages[0].content
    #last_message = messages[-1]
    retrieved = messages[-1].content  # can be list[Document] or str

    #docs = last_message.content
    context_text = _docs_to_text(retrieved)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a salesperson helping customers understand which Dell computers will best meet their needs. \n
            The customer wants to know the model name, CPU, memory, storage and price (dollar $). \n
            Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. \n
            Use three sentences maximum and keep the answer concise.\n\n
            Question: {question} \n\n
            Context: {context} \n\n
            Answer:""",
        input_variables=["context", "question"],
    )
    

    # LLM
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

    # Chain
    rag_chain = prompt | llm | StrOutputParser()

    # Run
    response = rag_chain.invoke({"context": context_text, "question": question})
    return {"messages": [response]} 



## Version 2 of generate node

In [None]:
### Main generation node
def generate(state):
    """
    Generate answer, providing enough information to judge faithfulness to the retrieved context.

    Expects:
      - state["messages"][0].content = question
      - state["messages"][-1].content = retrieved docs (list[Document] or str)

    Returns:
      - messages: [json_string]
      - (optional) context_text for evaluation/debug
    """
    print("---GENERATE---")
    messages = state["messages"]
    question = messages[0].content
    #last_message = messages[-1]
    retrieved = messages[-1].content  # can be list[Document] or str

    #docs = last_message.content
    context_text = _docs_to_text(retrieved)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a Dell salesperson, but you must be strictly faithful to the provided Context.
Return ONLY valid JSON (no markdown, no extra text).

TASK:
Given the Question and Context, provide a JSON-formatted response for every Dell product/config that matches.
If you cannot verify required fields from Context with direct quotes, output decision = "NO_MATCH".

REQUIRED OUTPUT JSON SCHEMA:
{{
  "decision": "MATCH" | "NO_MATCH",
  "products": [
    {{
      "model_name": {{"value": string, "quote": string}},
      "cpu":        {{"value": string, "quote": string}},
      "memory":     {{"value": string, "quote": string}},
      "storage":    {{"value": string, "quote": string}},
      "price":      {{"value": string, "quote": string}}
    }}
  ],
  "missing": [string],
  "notes": string
}}

RULES:
- Every quote must be copied verbatim from Context.
- If any required field cannot be supported by a quote from Context, you must use decision="NO_MATCH"
  and list missing fields in "missing". You may leave products empty in that case.
- Do not guess. Do not use outside knowledge.

Question: {question}

Context:
{context}
""",
        input_variables=["context", "question"],
    )
    

    # LLM
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

    # Chain
    rag_chain = prompt | llm | StrOutputParser()

    # Run
    response = rag_chain.invoke({"context": context_text, "question": question})
    return {"messages": [response], 
            "context_text": context_text, # for access in evaluation/debug, but not used by the agent   
            "final_json": response} # for access in evaluation/debug, but not used by the agent



In [None]:

from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import ToolNode

# Define a new graph
workflow = StateGraph(AgentState)

# Define the nodes we will cycle between
workflow.add_node("agent", agent)  # agent
retrieve = ToolNode([retriever_tool])
workflow.add_node("retrieve", retrieve)  # retrieval
workflow.add_node("rewrite", rewrite)  # Re-writing the question
workflow.add_node(
    "generate", generate
)  # Generating a response after we know the documents are relevant
# Call agent node to decide to retrieve or not
workflow.add_edge(START, "agent")

# Decide whether to retrieve
workflow.add_conditional_edges(
    "agent",
    tools_condition,
    {
        "tools": "retrieve",
        END: END,
    },
)

workflow.add_conditional_edges(
    "retrieve",
    grade_documents,
)
workflow.add_edge("generate", END)
workflow.add_conditional_edges(
    "rewrite",
    route_after_rewrite,
    {
        "continue": "agent",
        "stop": END,
    },
)

graph = workflow.compile()


In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:

import pprint

inputs = {
    "messages": [
#        ("user", "I want a dell computer for travel that has Intel Core 7 150U."),
#        ("user", "I want a dell computer that has Intel Core Ultra 5 135U vPro and has 512 GB SSD."),
        ("user", "I want a dell computer that has Intel Core Ultra 7 165U vPro and 1 TB SSD."),
#        ("user", "I want a light weight XPS computer with Intel Core Ultra 7 165U vPro and 1 TB SSD."),
    ],
    "rewrite_attempts": 0, # initialize rewrite attempts at zero
}
for output in graph.stream(inputs):
    for key, value in output.items():
        pprint.pprint(f"Output from node '{key}':")
        pprint.pprint("---")
        pprint.pprint(value, indent=2, width=80, depth=None)
    pprint.pprint("\n---\n")
