# Build a Multi-Agent Research Pipeline with LangGraph

![Multi-Agent Research Pipeline](images/multi-agent-research-pipeline.png)


In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

## Build a team of Analysts

In [None]:
from typing import List
from typing_extensions import TypedDict
from pydantic import BaseModel, Field

class Analyst(BaseModel):
    organization: str = Field(
        description="Where the analyst works or is connected to.",
    )
    full_name: str = Field(
        description="The analyst's full name."
    )
    job_title: str = Field(
        description="What the analyst does in this project or topic.",
    )
    about: str = Field(
        description="What the analyst cares about, what they focus on, or why they're involved.",
    )
    @property
    def profile(self) -> str:
        return f"""
Name: {self.full_name}
Title: {self.job_title}
Organization: {self.organization}
About: {self.about}
        """

class TeamOfAnalysts(BaseModel):
    analysts: List[Analyst] = Field(
        description="A list of people giving their input on the topic.",
    )

class AnalystSetup(TypedDict):
    topic: str                  # What this team is researching or discussing
    number_of_analysts: int     # How many analysts we want to include
    editor_feedback: str         # Notes or feedback from a human reviewer
    analysts: List[Analyst]     # The current list of analysts

In [None]:
from IPython.display import Image, display
from langgraph.graph import START, END, StateGraph
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage


def build_team_of_analysts(state: AnalystSetup):
    system_message = f"""
    Your task is to create a team of AI analysts. Please follow these steps:
	
    1.	Read the research topic:
    {state['topic']}

	2.	Look at the optional notes or feedback from a human editor. It may help shape the analysts:
    {state.get('editor_feedback', '')}

	3.	Find the most interesting ideas or issues based on the topic and feedback.
	
    4.	Choose the top {state['number_of_analysts']} ideas.
	
    5.	Create one analyst for each idea — each analyst should focus on just one theme.
    """
    
    structured_llm = llm.with_structured_output(TeamOfAnalysts)
    analysts = structured_llm.invoke([SystemMessage(content=system_message), HumanMessage(content="Create a team of analysts for this topic.")])
    
    return {"analysts": analysts.analysts}

def human_feedback(state: AnalystSetup):
    pass

def should_continue(state: AnalystSetup):
    human_feedback = state.get('editor_feedback', None)
    if human_feedback:
        return "build_team_of_analysts"
    return END

# Add nodes and edges 
builder = StateGraph(AnalystSetup)
builder.add_node("build_team_of_analysts", build_team_of_analysts)
builder.add_node("human_feedback", human_feedback)
builder.add_edge(START, "build_team_of_analysts")
builder.add_edge("build_team_of_analysts", "human_feedback")
builder.add_conditional_edges("human_feedback", should_continue, ["build_team_of_analysts", END])

# Compile
memory = MemorySaver()
graph = builder.compile(interrupt_before=['human_feedback'], checkpointer=memory)

# View
display(Image(graph.get_graph(xray=1).draw_mermaid_png()))

In [None]:
thread = {"configurable": {"thread_id": 3}}

setup = graph.invoke({
    "topic": "What Model Context Protocol is and why it makes AI smarter", 
    "number_of_analysts": 3}, 
    thread)

for analyst in setup['analysts']:
    print(analyst.profile)

In [None]:
# Get state and look at next node
state = graph.get_state(thread)
state.next

In [None]:
graph.update_state(thread, {
    "editor_feedback": "I think it would be helpful to add an analyst who focuses on how developers actually use MCP in real-world coding."}, 
    as_node="human_feedback")

In [None]:
# Continue the graph execution
for event in graph.stream(None, thread, stream_mode="values"):
    analysts = event.get('analysts', '')
    if analysts:
        for analyst in analysts:
            print(analyst.profile)

In [None]:
graph.update_state(thread, {"editor_feedback": None}, as_node="human_feedback")

In [None]:
graph.stream(None, thread, stream_mode="values")
final_state = graph.get_state(thread)
print(final_state.next)

analysts = final_state.values.get('analysts')
for analyst in analysts:
    print(analyst.profile)

## Analysts Interview Experts

In [None]:
from typing import List
from typing_extensions import Annotated
from langgraph.graph import MessagesState
import operator


# Stores everything that happens during the chat between one analyst and one expert
class InterviewSession(MessagesState):
    max_turns: int                          # How many times the analyst and expert can go back and forth
    sources: Annotated[List, operator.add]  # All search results found during the chat (Tavely, Wiki, etc.)
    analyst: Analyst                   # The analyst who is asking the questions
    full_conversation: str                  # The full interview as plain text (what they talked about)
    report_sections: List[str]              # What the analyst wrote based on the interview (can be 1 or more sections)
                                            # This is what we’ll use later to build the full report

### Asking a Question

In [None]:
def ask_question(state: InterviewSession):
    """Generates a question from the analyst to the expert, using the analyst's profile."""

    system_msg = SystemMessage(content=f"""
You are an AI analyst interviewing an expert about a specific topic.

Your job is to ask clear and thoughtful questions to get helpful, surprising, and specific answers.

1. Surprising: Ask things that lead to interesting or non-obvious insights.
2. Specific: Avoid general talk — push for examples and real details.

Here is your profile:
{state["analyst"].profile}

Begin by introducing yourself in your analyst voice and ask your question.

Keep asking until you understand the topic fully.

When you're done, say: "Thank you so much for your help!" — this will end the interview.

Stay in character throughout the conversation.""")

    # Generate the question using the LLM
    analyst_question = llm.invoke([system_msg] + state["messages"])

    # Return the new message to update the conversation
    return {"messages": [analyst_question]}

### Generating an Answer

#### Nodes

In [None]:
from pydantic import BaseModel, Field
from langchain_core.messages import get_buffer_string
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.tools import TavilySearchResults

# A simple model to help the expert write a good search query
class SearchTask(BaseModel):
    search_text: str = Field(
        None,
        description="A short search query to help find useful information for the analyst’s question."
    )

search_prompt = SystemMessage(content=f"""
You are helping generate a search query for a web search.

You'll be given the full conversation between an analyst and an expert.  
Look at the entire discussion, and focus especially on the **last question** from the analyst.

Your task: Turn that question into a clear, well-structured search query.""")


def search_web(state: InterviewSession):
    """Uses web search (Tavily) to find documents that help answer the analyst's question."""

    structured_llm = llm.with_structured_output(SearchTask)
    search_query = structured_llm.invoke([search_prompt] + state["messages"])

    # Run Tavily search with the generated query
    tavily_search = TavilySearchResults(max_results=5)
    results = tavily_search.invoke(search_query.search_text)

    # Format results into <Document> blocks
    formatted_docs = "\n\n---\n\n".join(
        [
            f'<Document href="{doc["url"]}"/>\n{doc["content"]}\n</Document>'
            for doc in results
        ]
    )

    return {"sources": [formatted_docs]}


def search_wikipedia(state: InterviewSession):
    """Uses Wikipedia to find documents that help answer the analyst's question."""

    structured_llm = llm.with_structured_output(SearchTask)
    search_query = structured_llm.invoke([search_prompt] + state["messages"])

    # Run Wikipedia search using the query
    results = WikipediaLoader(query=search_query.search_text, load_max_docs=5).load()

    # Format results into <Document> blocks with source and page
    formatted_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in results
        ]
    )

    return {"sources": [formatted_docs]}


def answer_question(state: InterviewSession):
    """Expert reads the question and answers it using only the documents found in search."""

    system_msg = SystemMessage(content=f"""
You are an expert being interviewed by an AI analyst.

Here is the analyst's profile:
{state["analyst"].profile}

And here are documents you should use to answer the question:
{state["sources"]}

Guidelines:
1. Use only the info from the documents.
2. Don't guess or add anything new.
3. Reference documents using numbers like [1], [2].
4. List those sources at the bottom.
5. For example, write: [1] assistant/docs/mcp_guide.pdf, page 7.""")

    expert_reply = llm.invoke([system_msg] + state["messages"])
    expert_reply.name = "expert"

    return {"messages": [expert_reply]}


def save_interview(state: InterviewSession):
    """Saves the full chat between analyst and expert as a plain text string."""

    conversation = get_buffer_string(state["messages"])
    return {"full_conversation": conversation}


def write_report_section(state: InterviewSession):
    """
    Writes a short report section based on the interview transcript,
    supported by any referenced source documents.
    """
    
    system_msg = SystemMessage(content=f"""
You are a technical writer creating a short report based on an interview with an expert.

Your job is to write a clear, engaging section using the interview transcript as the main source, 
while using the attached documents only to support factual claims with proper citations.

Here’s how to structure the report using Markdown:

## Title  
### Summary  
### Sources

Writing instructions:
1. Use the interview transcript as your **primary source of insight**.
2. If a factual claim in the interview **can be confirmed by a document**, cite it using [1], [2], etc.
3. If a fact appears in the interview **but not in the documents**, it's okay to include it — just treat it as part of the expert's opinion.
4. Do **not** invent or assume anything beyond the transcript and the documents.
5. Keep the tone clear and concise. Avoid naming the expert or analyst.
6. Max length: 500 words.
7. In the “Sources” section, list each unique document used (no duplicates).
8. Use full links or filenames (e.g., [1] https://example.com or assistant/docs/mcp_guide.pdf, page 7).

Final review:
- Ensure Markdown structure is followed
- Make the title engaging and relevant to this focus area:
  **{state["analyst"].about}**""")

    # Provide both the documents and interview to the LLM
    user_msg = HumanMessage(content=f"""
Here are the materials you should use:

--- INTERVIEW TRANSCRIPT ---
{state["full_conversation"]}

--- DOCUMENTS FOR CITATION ---
{state["sources"]}
""")

    report = llm.invoke([system_msg, user_msg])
    return {"report_sections": [report.content]}

#### Conditional Edge

In [None]:
def continue_or_finish(state: InterviewSession, name: str = "expert"):
    """Decides if the interview should continue or end after each answer."""

    messages = state["messages"]
    max_turns = state.get("max_turns", 2)

    # Count how many times the expert has responded
    answers = [m for m in messages if isinstance(m, AIMessage) and m.name == name]
    
    if len(answers) >= max_turns:
        return "save_interview"

    # Check if the last analyst question said "thank you"
    last_question = messages[-2]
    if "Thank you so much for your help" in last_question.content:
        return "save_interview"

    return "ask_question"

#### Constructing a Graph

In [None]:
# Create the graph and define its data structure
interview_graph_builder = StateGraph(InterviewSession)

# Define each step (node) in the conversation flow
interview_graph_builder.add_node("ask_question", ask_question)
interview_graph_builder.add_node("search_web", search_web)
interview_graph_builder.add_node("search_wikipedia", search_wikipedia)
interview_graph_builder.add_node("answer_question", answer_question)
interview_graph_builder.add_node("save_interview", save_interview)
interview_graph_builder.add_node("write_report_section", write_report_section)

# Define the flow between steps

# Start: analyst asks a question
interview_graph_builder.add_edge(START, "ask_question")

# Then search both web and Wikipedia in parallel
interview_graph_builder.add_edge("ask_question", "search_web")
interview_graph_builder.add_edge("ask_question", "search_wikipedia")

# When both searches are done, expert answers the question
interview_graph_builder.add_edge("search_web", "answer_question")
interview_graph_builder.add_edge("search_wikipedia", "answer_question")

# Decide whether to continue or end the interview
interview_graph_builder.add_conditional_edges(
    "answer_question",
    continue_or_finish,
    ["ask_question", "save_interview"]
)

# Once done, save the full interview transcript
interview_graph_builder.add_edge("save_interview", "write_report_section")

# Finally, write the report section and finish
interview_graph_builder.add_edge("write_report_section", END)

# Create the compiled graph with memory saving
memory = MemorySaver()
interview_graph = interview_graph_builder.compile(checkpointer=memory).with_config(
    run_name="analysts_interview_experts"
)

# Display the flow as an image
display(Image(interview_graph.get_graph().draw_mermaid_png()))

In [None]:
analysts[0]

In [None]:
from IPython.display import Markdown

thread = {"configurable": {"thread_id": "1"}}
topic = "What Model Context Protocol is and why it makes AI smarter"

# Start the conversation with a natural topic introduction
messages = [HumanMessage(content=f"I'm researching the topic: {topic}. I'd like to ask you a few questions.")]


interview = interview_graph.invoke({
    "analyst": analysts[0],
    "messages": messages,
    "max_turns": 2
}, thread)

# Display the generated report section
Markdown(interview["report_sections"][0])

In [None]:
print(interview["full_conversation"])

## Build the Final Report

In [None]:
from typing import List, Annotated
import operator
from typing_extensions import TypedDict

class ResearchProjectState(TypedDict):
    topic: str  # The main topic this project is focused on
    number_of_analysts: int  # How many analysts we want to include
    editor_feedback: str  # Notes or guidance from a human reviewer
    analysts: List[Analyst]  # The team of analysts created for this topic

    report_sections: Annotated[List[str], operator.add]  
    # Sections written by each analyst based on their interview.
    # These will be merged into a single list to build the final report.

    introduction: str  # Intro paragraph for the final report
    content: str       # Main content/body of the final report
    conclusion: str    # Final paragraph that wraps up the report
    final_report: str  # The full report (intro + content + conclusion)

### Map - Reduce

In [None]:
from langgraph.constants import Send

def start_all_interviews(state: ResearchProjectState):
    """
    This is the map step of the graph.

    If there's human feedback, we go back and rebuild the analyst team.

    Otherwise, we run a separate interview sub-graph for each analyst,
    passing in the topic as a message to start the conversation.
    """

    # If feedback is present, return to team creation
    editor_feedback = state.get("editor_feedback")
    if editor_feedback:
        return "build_team_of_analysts"

    # Otherwise, launch interviews in parallel
    return [
        Send(
            "analysts_interview_experts",
            {
                "analyst": analyst,
                "messages": [
                    HumanMessage(content=f"I'm researching the topic: {state["topic"]}. I'd like to ask you a few questions.")
                ]
            }
        )
        for analyst in state["analysts"]
    ]

### Create Writing Nodes

In [None]:
def write_report_summary(state: ResearchProjectState):
    """
    Combines all analyst memos into the main report content.
    """

    summary_instructions = f"""
You are a technical writer. Your task is to write a report on the following topic:

{state["topic"]}

Here's what you should know:

- A team of analysts interviewed experts on different subtopics.
- Each analyst wrote a short memo with their findings.

Your job:

1. You’ll receive all the memos written by the analysts.
2. Carefully review them and extract the key ideas.
3. Combine everything into one clear and cohesive summary that tells the full story.
4. Organize the insights into a single, well-written report.

Formatting instructions:

- Use Markdown.
- Don’t include any preamble or introduction to the report itself.
- Don’t use any subheadings.
- Begin with a top-level heading: ## Insights
- Do not mention the names of any analysts.
- Keep any citations from the memos (like [1], [2], etc.) exactly as they are.
- At the end, add a ## Sources section with all sources used, listed in order.
- Don’t repeat duplicate sources.

Here’s how the sources section should look:

[1] Source 1  
[2] Source 2

Here are the memos to work from:

{"\n\n".join(state["report_sections"])}"""


    system_msg = SystemMessage(content=summary_instructions)
    user_msg = HumanMessage(content="Please summarize all the memos into a single report.")

    summary = llm.invoke([system_msg, user_msg])
    return {"content": summary.content}





def write_report_introduction(state: ResearchProjectState):
    """
    Writes the introduction section for the final report.
    """

    intro_instructions = f"""
You are a technical writer helping to finalize a report on the topic:

{state["topic"]}

You’ll receive all of the report’s main sections and will be asked to write either the introduction.

Instructions:

- Keep it clear and compelling.
- Stick to around 100 words.
- Don’t include any preamble before the section.
- Use Markdown formatting.
- Start with a strong title using a single # header
- Then add a ## Introduction heading
- Give a brief preview of the key ideas from the sections

Here are the report sections to reflect on:

{"\n\n".join(state["report_sections"])}"""

    
    system_msg = SystemMessage(content=intro_instructions)
    user_msg = HumanMessage(content="Please write an introduction for the report.")

    intro = llm.invoke([system_msg, user_msg])
    return {"introduction": intro.content}


def write_report_conclusion(state: ResearchProjectState):
    """
    Writes the conclusion section for the final report.
    """

    conclusion_instructions = f"""
You are a technical writer helping to finalize a report on the topic:

{state["topic"]}

You’ll receive all of the report’s main sections and will be asked to write either the conclusion.

Instructions:

- Keep it clear and compelling.
- Stick to around 100 words.
- Don’t include any preamble before the section.
- Use Markdown formatting.
- Use a ## Conclusion heading
- Summarize the core takeaways from the report

Here are the report sections to reflect on:

{"\n\n".join(state["report_sections"])}"""

    system_msg = SystemMessage(content=conclusion_instructions)
    user_msg = HumanMessage(content="Please write a conclusion for the report.")

    conclusion = llm.invoke([system_msg, user_msg])
    return {"conclusion": conclusion.content}


def finalize_report(state: ResearchProjectState):
    """
    Combines the intro, main content, conclusion, and sources
    into one complete Markdown-formatted report.
    """

    content = state["content"]
    # Remove title if repeated inside the content
    if content.startswith("## Insights"):
        content = content.replace("## Insights", "").strip()

    # Extract sources if present
    if "## Sources" in content:
        try:
            content, sources = content.split("\n## Sources\n")
        except Exception:
            sources = None
    else:
        sources = None

    # Build final report
    full_report = f"""
    {state["introduction"]}
    
    ---
    
    {content}
    
    ---
    
    {state["conclusion"]}"""

    if sources:
        full_report += f"\n\n## Sources\n{sources}"

    return {"final_report": full_report}

### Building Everything Up

In [None]:
# Set up the full research project graph
builder = StateGraph(ResearchProjectState)

# Add all nodes
builder.add_node("build_team_of_analysts", build_team_of_analysts)
builder.add_node("human_feedback", human_feedback)
builder.add_node("analysts_interview_experts", interview_graph)
builder.add_node("write_report_summary", write_report_summary)
builder.add_node("write_report_introduction", write_report_introduction)
builder.add_node("write_report_conclusion", write_report_conclusion)
builder.add_node("finalize_report", finalize_report)

# Set up graph flow
builder.add_edge(START, "build_team_of_analysts")
builder.add_edge("build_team_of_analysts", "human_feedback")
builder.add_conditional_edges("human_feedback", start_all_interviews, [
    "build_team_of_analysts",
    "analysts_interview_experts"
])
builder.add_edge("analysts_interview_experts", "write_report_summary")
builder.add_edge("analysts_interview_experts", "write_report_introduction")
builder.add_edge("analysts_interview_experts", "write_report_conclusion")
builder.add_edge(
    ["write_report_summary", "write_report_introduction", "write_report_conclusion"],
    "finalize_report"
)
builder.add_edge("finalize_report", END)

# Compile the graph
memory = MemorySaver()
research_graph = builder.compile(interrupt_before=["human_feedback"], checkpointer=memory)

# Display the full graph
display(Image(research_graph.get_graph(xray=1).draw_mermaid_png()))

### Testing

In [None]:
# Inputs
thread = {"configurable": {"thread_id": 1}}
number_of_analysts = 3 
topic = "What Model Context Protocol is and why it makes AI smarter"


# Run the graph until the first interruption
for event in research_graph.stream({"topic": topic,
                           "number_of_analysts": number_of_analysts}, 
                          thread, 
                          stream_mode="values"):
    analysts = event.get('analysts', '')
    if analysts:
        for analyst in analysts:
            print(analyst.profile)

In [None]:
state = research_graph.get_state(thread)
state.next

In [None]:
# We now update the state as if we are the human_feedback node
research_graph.update_state(thread, {
    "editor_feedback": "I think it would be helpful to add an analyst who focuses on how developers actually use MCP in real-world coding."
    }, as_node="human_feedback")

In [None]:
# Check
for event in research_graph.stream(None, thread, stream_mode="values"):
    analysts = event.get('analysts', '')
    if analysts:
        for analyst in analysts:
            print(analyst.profile)

In [None]:
# Confirm we are happy
research_graph.update_state(thread, {"editor_feedback": None}, as_node="human_feedback")

In [None]:
# Continue
for event in research_graph.stream(None, thread, stream_mode="updates"):
    node_name = next(iter(event.keys()))
    print("> " + node_name)

In [None]:
from IPython.display import Markdown
final_state = research_graph.get_state(thread)
report = final_state.values.get('final_report')
Markdown(report)