# LangGraph and LangSmith - Agentic RAG Powered by LangChain

## Common Setup

In [1]:
import os
import getpass
from uuid import uuid4

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

os.environ["TAVILY_API_KEY"] = getpass.getpass("TAVILY_API_KEY")

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"AIE8 - LangGraph - {uuid4().hex[0:8]}"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangSmith API Key: ")

In [2]:
from langchain_openai import ChatOpenAI

default_llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)
helpfulness_llm = ChatOpenAI(model="gpt-4.1-mini")

In [3]:
from langchain_core.messages import HumanMessage
from langchain_tavily import TavilySearch
from langchain_community.tools.arxiv.tool import ArxivQueryRun

tavily_tool = TavilySearch(max_results=5)

tool_belt = [
    tavily_tool,
    ArxivQueryRun(),
]

model_with_tools = default_llm.bind_tools(tool_belt)

In [4]:
from langgraph.prebuilt import ToolNode

def call_model(state):
  messages = state["messages"]
  response = model_with_tools.invoke(messages)
  return {"messages" : [response]}

tool_node = ToolNode(tool_belt)

## Simple Agentic Graph

In [5]:
from langgraph.graph import StateGraph, END

def should_continue(state):
  last_message = state["messages"][-1]

  if last_message.tool_calls:
    return "action"

  return END

In [6]:
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages

class AgentState(TypedDict):
  messages: Annotated[list, add_messages]

In [7]:
simple_graph = StateGraph(AgentState)

simple_graph.add_node("agent", call_model)
simple_graph.add_node("action", tool_node)

simple_graph.add_conditional_edges(
    "agent",
    should_continue
)

simple_graph.add_edge("action", "agent")

simple_graph.set_entry_point("agent")

<langgraph.graph.state.StateGraph at 0x7f3f0be91160>

In [8]:
simple_graph_app = simple_graph.compile()

In [9]:
inputs = {"messages" : [HumanMessage(content="How are technical professionals using AI to improve their work?")]}

async for chunk in simple_graph_app.astream(inputs, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        print(values["messages"])
        print("\n\n")

Receiving update from node: 'agent'
[AIMessage(content='Technical professionals are using AI in various ways to enhance their work, including automating repetitive tasks, improving decision-making, analyzing large datasets, developing new products and services, and optimizing processes. They leverage AI for tasks such as machine learning model development, natural language processing, computer vision, predictive analytics, and automation of routine operations. This integration of AI helps increase efficiency, accuracy, and innovation across different fields like software development, data analysis, cybersecurity, engineering, and research. Would you like specific examples or insights into particular industries or roles?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 108, 'prompt_tokens': 1352, 'total_tokens': 1460, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction

In [10]:
inputs = {"messages" : [HumanMessage(content="Search Arxiv for the A Comprehensive Survey of Deep Research paper, then search each of the authors to find out where they work now using Tavily!")]}

async for chunk in simple_graph_app.astream(inputs, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        if node == "action":
          print(f"Tool Used: {values['messages'][0].name}")
        print(values["messages"])
        print("\n\n")

Receiving update from node: 'agent'
[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_LKTiHV0xrYCg50fBoBXJvSIn', 'function': {'arguments': '{"query": "A Comprehensive Survey of Deep Research"}', 'name': 'arxiv'}, 'type': 'function'}, {'id': 'call_lowggRGKKCS1NegORGegF9VH', 'function': {'arguments': '{"query": "where does the author of A Comprehensive Survey of Deep Research work"}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 62, 'prompt_tokens': 1371, 'total_tokens': 1433, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-nano-2025-04-14', 'system_fingerprint': 'fp_7c233bf9d1', 'id': 'chatcmpl-CJDGjZt0iGUjvyEHRb0HXMHjVKXq0', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--8b7

## Simple Agentic Graph with LangSmith Evaluation

In [11]:
def convert_inputs(input_object):
  return {"messages" : [HumanMessage(content=input_object["text"])]}

def parse_output(input_state):
  return {"answer" : input_state["messages"][-1].content}

agent_chain_with_formatting = convert_inputs | simple_graph_app | parse_output

agent_chain_with_formatting.invoke({"text" : "What is Deep Research?"})

{'answer': 'Deep Research is an AI-powered tool or agent integrated into platforms like ChatGPT that autonomously browses the web, analyzes, and synthesizes information to generate detailed, cited reports on a user-specified topic. It is designed to perform in-depth research by exploring multiple sources over a period of time, typically ranging from 5 to 30 minutes, to provide comprehensive insights and analysis. This tool is useful for conducting thorough investigations on complex topics, producing well-documented reports, and gaining expert-level understanding.'}

In [12]:
questions = [
    {
        "inputs" : {"text" : "Who were the main authors on the 'A Comprehensive Survey of Deep Research: Systems, Methodologies, and Applications' paper?"},
        "outputs" : {"must_mention" : ["Peng", "Xu"]}   
    },
    {
        "inputs" : {"text" : "Where do the authors of the 'A Comprehensive Survey of Deep Research: Systems, Methodologies, and Applications' work now?"},
        "outputs" : {"must_mention" : ["Zhejiang", "Liberty Mutual"]}
    }
]

In [13]:
from langsmith import Client

client = Client()

dataset_name = f"Simple Search Agent - Evaluation Dataset - {uuid4().hex[0:8]}"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Questions about the cohort use-case to evaluate the Simple Search Agent."
)

client.create_examples(
    dataset_id=dataset.id,
    examples=questions
)

{'example_ids': ['43e2ad32-baaa-44f1-b05a-d186f6c4c630',
  'd69e0e63-45e7-446b-8256-86bd0d75cb45'],
 'count': 2}

In [14]:
from openevals.llm import create_llm_as_judge
from openevals.prompts import CORRECTNESS_PROMPT
# print(CORRECTNESS_PROMPT)

correctness_evaluator = create_llm_as_judge(
        prompt=CORRECTNESS_PROMPT,
        model="openai:o3-mini", # very impactful to the final score
        feedback_key="correctness",
    )

In [15]:
def must_mention(inputs: dict, outputs: dict, reference_outputs: dict) -> float:
  # determine if the phrases in the reference_outputs are in the outputs
  required = reference_outputs.get("must_mention") or []
  score = all(phrase in outputs["answer"] for phrase in required)
  return score

In [16]:
results = client.evaluate(
    agent_chain_with_formatting,
    data=dataset.name,
    evaluators=[correctness_evaluator, must_mention],
    experiment_prefix="simple_agent, baseline",  # optional, experiment name prefix
    description="Testing the baseline system.",  # optional, experiment description
    max_concurrency=4, # optional, add concurrency
)

View the evaluation results for experiment: 'simple_agent, baseline-40f875e7' at:
https://smith.langchain.com/o/29b9636b-ddfa-4496-93ee-b2875ed2ee09/datasets/a98fb387-c060-4c77-a427-3d0982b0a74b/compare?selectedSessions=13917d7b-ac03-4d92-9690-e3ac515a199c




0it [00:00, ?it/s]

## Agentic Graph with LLM Helpfulness Check

In [17]:
HELPFULNESS_PROMPT_TMPL = """\
Given an initial query and a final response, determine if the final response is extremely helpful or not. Please indicate helpfulness with a 'Y' and unhelpfulness as an 'N'.

Initial Query:
{initial_query}

Final Response:
{final_response}"""

In [18]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.graph import StateGraph, END

def tool_call_or_helpful(state):
  last_message = state["messages"][-1]

  if last_message.tool_calls:
    return "action"

  initial_query = state["messages"][0]
  final_response = state["messages"][-1]

  if len(state["messages"]) > 10:
    return "END"

  helpfullness_prompt_template = PromptTemplate.from_template(HELPFULNESS_PROMPT_TMPL)

  helpfulness_chain = helpfullness_prompt_template | helpfulness_llm | StrOutputParser()

  helpfulness_response = helpfulness_chain.invoke({"initial_query" : initial_query.content, "final_response" : final_response.content})

  if "Y" in helpfulness_response:
    return "end"
  else:
    return "continue"

In [19]:
class AgentState(TypedDict):
  messages: Annotated[list, add_messages]

In [20]:
helpfulness_graph = StateGraph(AgentState)

helpfulness_graph.add_node("agent", call_model)
helpfulness_graph.add_node("action", tool_node)

helpfulness_graph.add_edge("action", "agent")

helpfulness_graph.add_conditional_edges(
    "agent",
    tool_call_or_helpful,
    {
        "continue" : "agent",
        "action" : "action",
        "end" : END
    }
)

helpfulness_graph.set_entry_point("agent")

<langgraph.graph.state.StateGraph at 0x7f3ee0081a90>

In [21]:
helpfulness_graph_app = helpfulness_graph.compile()

In [22]:
inputs = {"messages" : [HumanMessage(content="What are Deep Research Agents?")]}

async for chunk in helpfulness_graph_app.astream(inputs, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        print(values["messages"])
        print("\n\n")

Receiving update from node: 'agent'
[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ek2plYVa9rIntjcqBvPiRKEp', 'function': {'arguments': '{"query":"Deep Research Agents"}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 1347, 'total_tokens': 1365, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 1280}}, 'model_name': 'gpt-4.1-nano-2025-04-14', 'system_fingerprint': 'fp_7c233bf9d1', 'id': 'chatcmpl-CJDH7YtkadEwoxisOKPLAqogSodhD', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--eed8b2b8-bb20-48b3-ad3f-81515a9af318-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'Deep Research Agents'}, 'id': 'call_Ek2plYVa9rIntjcqBvPiRKEp', 'type': 'tool_call'}], usage_metadata={'input_toke

In [23]:
patterns = ["Context Engineering", "Fine-tuning", "LLM-based agents"]

for pattern in patterns:
  what_is_string = f"What is {pattern} and when did it break onto the scene??"
  inputs = {"messages" : [HumanMessage(content=what_is_string)]}
  messages = helpfulness_graph_app.invoke(inputs)
  print(messages["messages"][-1].content)
  print("\n\n")

Context Engineering is an emerging discipline in AI and cybersecurity that involves strategically designing the inputs, especially prompts, provided to AI systems to influence or control their outputs. It focuses on how framing, tone, structure, and supplemental context shape an AI model’s response. As AI becomes more embedded in business operations, content generation, and decision-making, context engineering is becoming a crucial skill for professionals aiming to maximize accuracy, relevance, and safety in AI-generated results.

The concept gained significant attention around early 2023, particularly with the rise of ChatGPT and other large language models, when people started sharing clever prompts and techniques to better control AI outputs. It is considered to be the next evolution beyond prompt engineering, emphasizing the importance of the entire context and system design in AI interactions.



Fine-tuning is a machine learning technique used to adapt a pre-trained model to a sp