# imports and inits

In [None]:
from typing import List, Optional, Union, TypedDict, Literal
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, BaseMessage, trim_messages, AIMessage
from langgraph.graph import END, StateGraph, START
from langchain_groq import ChatGroq
from langgraph.prebuilt import create_react_agent
from dotenv import load_dotenv
from pprint import pprint
import functools
import operator

from team_tools import tavily_search_tool, arxiv_search_tool, web_scraper_tool, repl_tool

from qdrant_cloud_ops import initialize_selfquery_retriever, qdrant_vector_store
from chains import create_decomposition_chain
from typing import Annotated, List, Union, TypedDict
from langchain.tools.retriever import create_retriever_tool







load_dotenv()

# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

llm = ChatGroq(model='llama-3.1-70b-versatile')

trimmer = trim_messages(
    max_tokens=120000,
    strategy="last",
    token_counter=llm,
    include_system=True,
)

qdrant_retriever = initialize_selfquery_retriever(llm, qdrant_vector_store=qdrant_vector_store)
qdrant_retriever_tool = create_retriever_tool(
    qdrant_retriever,
    "retrieve_research_paper_texts",
    "Search and return information from the vector database containing texts of several research papers, and scholarly articles",
)

dec = create_decomposition_chain(llm=llm)

  from .autonotebook import tqdm as notebook_tqdm



Started Qdrant client.
Collection 'aireas-cloud' already exists.


# Utils

In [2]:
def create_team_supervisor(llm, system_prompt, members) -> str:
    """An LLM-based supervisor router for ReWOO-style reasoning."""
    options = ["FINISH"] + members
    
    # Define a function schema for routing steps
    function_def = {
        "name": "route",
        "description": "Execute the steps of a given plan sequentially by invoking the specified agent.",
        "parameters": {
            "title": "routeSchema",
            "type": "object",
            "properties": {
                "next": {
                    "title": "Next Agent",
                    "anyOf": [
                        {"enum": options},
                    ],
                },
                "step": {
                    "title": "Step Description",
                    "type": "string",
                    "description": "The specific plan step description to execute, including agent and input.",
                },
                "input": {
                    "title": "Agent Input",
                    "type": "string",
                    "description": "The exact input required for the agent in this step.",
                },
            },
            "required": ["next", "step", "input"],
        },
    }
    
    # Define a system prompt to guide the supervisor's behavior
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="messages"),
            (
                "system",
                "Based on the conversation and plan, determine who should act next. "
                "Provide the step to execute, the agent to use, and the input for the agent. "
                "Select one of: {options}. Use FINISH when all steps are complete."
            ),
        ]
    ).partial(options=str(options), team_members=", ".join(members))
    
    # Combine with trimmer, bind functions, and output parser
    return (
        prompt
        | trimmer
        | llm.bind_functions(functions=[function_def], function_call="route")
        | JsonOutputFunctionsParser()
    )



In [3]:
def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {
        "messages": [HumanMessage(content=result["messages"][-1].content, name=name)]
    }


# def create_team_supervisor(llm, system_prompt, members) -> str:
#     """An LLM-based router."""
#     options = ["FINISH"] + members
#     function_def = {
#         "name": "route",
#         "description": "Execute the steps of given plan.",
#         "parameters": {
#             "title": "routeSchema",
#             "type": "object",
#             "properties": {
#                 "next": {
#                     "title": "Next",
#                     "anyOf": [
#                         {"enum": options},
#                     ],
#                 },
#             },
#             "required": ["next"],
#         },
#     }
#     prompt = ChatPromptTemplate.from_messages(
#         [
#             ("system", system_prompt),
#             MessagesPlaceholder(variable_name="messages"),
#             (
#                 "system",
#                 "Given the conversation above, who should act next?"
#                 "Or should we FINISH? Select one of: {options}",
#             ),
#         ]
#     ).partial(options=str(options), team_members=", ".join(members))
#     return (
#         prompt
#         | trimmer
#         | llm.bind_functions(functions=[function_def], function_call="route")
#         | JsonOutputFunctionsParser()
#     )

def enter_chain(message: str):
    results = {
        "messages": [HumanMessage(content=message)],
    }
    return results

# New teams

## research team

In [4]:
# from langgraph.prebuilt import create_react_agent

# k = create_react_agent(llm, tools=[tavily_search_tool, qdrant_retriever_tool])


# for s in k.stream({"messages": [("user", 'what is attention mechanism')]}, stream_mode="values"):
#     message = s["messages"][-1]
#     if isinstance(message, tuple):
#         print(message)
#     elif isinstance(message, AIMessage) and message.content.strip():  # Ensure content is not empty
#         message.pretty_print()


In [5]:
# rag_agent = create_react_agent(llm, tools=[qdrant_retriever_tool])
# rag_node = functools.partial(agent_node, agent=rag_agent, name="RAGSearcher")

# search_agent = create_react_agent(llm, tools=[tavily_search_tool, web_scraper_tool, arxiv_search_tool])
# search_node = functools.partial(agent_node, agent=search_agent, name="Searcher")

# code_agent = create_react_agent(llm, tools=[repl_tool])
# code_node = functools.partial(agent_node, agent=code_agent, name="Coder")

## Rewoo

## rewoo prompts

In [6]:
rewoo_prompt = """
You will be given a string of simple questions seperated by comma. For each question, develop a series of sequential plans that specify exactly which agents to use to retrieve the necessary evidence. Format each plan in the following way:

Format:
Plan: [Provide a concise description of the intended action, including any specific sources, search queries, or steps that must be followed. Reference any evidence needed from previous steps.]
#E[number] = [Agent[Specific Query/Input, including any references to previous #E results if applicable]]

Use the minimum number of plans necessary to provide an accurate and relevant answer. Each plan should be followed by only one #E, with clear sequential ordering for reference by subsequent steps.
Strictly provide output only. Provide a complete plan that addresses all questions as a whole, instead of creating individual plans for each question.

Agents Available:
- RAGSearcher[input]: Uses a vector database to retrieve relevant documents or research papers based on prior knowledge or pre-embedded content (use for tasks related to research papers in the form of PDFs or topics embedded in the database).
- Searcher[input]: Conducts searches via Tavily search, Web Scraper, or Arxiv Search to retrieve both general web information and academic papers from online sources.
- Coder[input]: A code-execution agent using Python REPL for tasks requiring code, data analysis, or visualizations.
- ChatBot[input]: Processes or generates natural language responses based on gathered evidence or specific input.

### Routing Guidance:
- For research papers or queries based on prior knowledge, use **RAGSearcher** to pull from the vector database.
- For general online or web-based searches, use **Searcher**.
- For code or analysis tasks, use **Coder**.
- For summarization or extracting specific information from prior steps, use **ChatBot**.

Embedded Documents in the Vector Database:
   - "Attention is All You Need" (name: a.pdf)
   - "MAGVIT: Masked Generative Video Transformer" (name: m.pdf)
   - "SAN: Inducing Metrizability of GAN with Discriminative Normalized Linear Layer" (name: san.pdf)

### Advice:
- When creating each plan, ensure that the query given to the agent precisely reflects the questions received. This will help the agent retrieve the most relevant answer directly aligned with the original question.

### Examples
Task: Summarize recent advancements in Video Transformers for action recognition tasks.
Plan: Search for recent publications on Arxiv that discuss Video Transformers for action recognition using the Searcher agent. #E1 = Searcher[Video Transformers action recognition]
Plan: Retrieve related research papers on Video Transformers stored in the vector database using the RAGSearcher agent. #E2 = RAGSearcher[Video Transformers action recognition]
Plan: Use the retrieved documents to generate a summary highlighting advancements in the use of Video Transformers for action recognition. #E3 = ChatBot[Summarize #E1, #E2]

Task: Analyze the importance of GAN metrizability for improved performance in generative models.
Plan: Search Arxiv for recent studies on GAN metrizability and its impact on generative model performance using the Searcher agent. #E1 = Searcher[GAN metrizability and generative models]
Plan: Retrieve any embedded research on GAN metrizability from the vector database using the RAGSearcher agent. #E2 = RAGSearcher[GAN metrizability in generative models]
Plan: Summarize findings on why metrizability is significant for GAN performance based on the retrieved papers. #E3 = ChatBot[Summarize #E1, #E2]

Task: {task}

"""


rewoo_solve_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
contain irrelevant information.

{plan}

Now solve the question or task according to provided Evidence above. Respond with the answer
directly with no extra words.

Task: {task}
Response:"""

## rewoo state

In [7]:
# dec.invoke('abstract of san.pdf and explain difference between san and attention, explain these concepts in simpler terms')
task = dec.invoke('abstract of san.pdf and explain difference between san and attention, explain these concepts in simpler terms')
task


'What is the main idea or summary of the san.pdf document?, What is the concept of SAN (Self-Attention Network) in simple terms?, How does SAN differ from the Attention mechanism, explained in simple terms?'

In [8]:
from typing import List, Dict
from typing_extensions import TypedDict
from pprint import pprint

class ResearchTeamState(TypedDict):
    messages: Annotated[List[BaseMessage], operator.add]
    team_members: List[str]

    task: str
    plan_string: str
    steps: List[Dict[str, str]]
    results: dict
    result: str

    next: str
    step: str
    input: str

## team

In [17]:
import re

from langchain_core.prompts import ChatPromptTemplate

# Regex to match expressions of the form E#... = ...[...]
regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
prompt_template = ChatPromptTemplate.from_messages([("user", rewoo_prompt)])
planner = prompt_template | llm


def get_plan(state: ResearchTeamState):
    task = state["task"]
    result = planner.invoke({"task": task})
    # Find all matches in the sample text
    matches = re.findall(regex_pattern, result.content)
    return {"steps": matches, "plan_string": result.content}


print(planner.invoke(dec.invoke('abstract of san.pdf and explain difference between san and attention, explain these concepts in simpler terms'))
.content)

Plan: Retrieve the embedded research document 'san.pdf' from the vector database using the RAGSearcher agent. #E1 = RAGSearcher[san.pdf]
Plan: Summarize the main idea of the retrieved 'san.pdf' document using the ChatBot agent. #E2 = ChatBot[Summarize #E1]
Plan: Use the ChatBot agent to explain the concept of SAN (Self-Attention Network) based on the retrieved document. #E3 = ChatBot[Explain SAN concept in simple terms]
Plan: Search the vector database for the embedded document 'a.pdf' (Attention is All You Need) using the RAGSearcher agent. #E4 = RAGSearcher[a.pdf]
Plan: Use the ChatBot agent to compare and contrast the concepts of SAN and Attention in simple terms based on the retrieved documents. #E5 = ChatBot[Distinguish SAN and Attention concepts in simple terms #E3, #E4]


In [13]:
rag_agent = create_react_agent(llm, tools=[qdrant_retriever_tool])
rag_node = functools.partial(agent_node, agent=rag_agent, name="RAGSearcher")

search_agent = create_react_agent(llm, tools=[tavily_search_tool, web_scraper_tool, arxiv_search_tool])
search_node = functools.partial(agent_node, agent=search_agent, name="Searcher")

code_agent = create_react_agent(llm, tools=[repl_tool])
code_node = functools.partial(agent_node, agent=code_agent, name="Coder")

chat_node = functools.partial(agent_node, agent=llm, name="ChatBot")

In [18]:
supervisor_agent = create_team_supervisor(
    llm,
    system_prompt=(
        "You are a supervisor tasked with managing and orchestrating a team of workers: "
        "RAGSearcher, Searcher, Coder, and ChatBot. Your goal is to execute the provided plan sequentially. "
        "Each worker will perform a task as per the step description and respond with their results and status. "
        "When all steps are complete, respond with FINISH.\n\n"

        "At each step:\n"
        "- Identify the next agent to act based on the plan.\n"
        "- Specify the step to execute and the required input.\n"
        "- Continue until all steps are completed.\n"
        "Ensure the responses are concise, relevant, and aligned with the task."
    ),
    members=["RAGSearcher", "Searcher", "Coder", "ChatBot"]
)


In [15]:
# Create the research graph with ReWOO-style orchestration
research_graph = StateGraph(ResearchTeamState)

# Add nodes for each agent
research_graph.add_node("RAGSearcher", rag_node)
research_graph.add_node("Searcher", search_node)
research_graph.add_node("Coder", code_node)
research_graph.add_node("ChatBot", chat_node)
research_graph.add_node("supervisor", supervisor_agent)

# Define the control flow
# Agents report back to the supervisor upon completion
research_graph.add_edge("RAGSearcher", "supervisor")
research_graph.add_edge("Searcher", "supervisor")
research_graph.add_edge("Coder", "supervisor")
research_graph.add_edge("ChatBot", "supervisor")

# Supervisor decides the next agent to execute based on the ReWOO plan
research_graph.add_conditional_edges(
    "supervisor",
    lambda x: x["next"],  # The supervisor decides the next agent
    {
        "RAGSearcher": "RAGSearcher",
        "Searcher": "Searcher",
        "Coder": "Coder",
        "ChatBot": "ChatBot",
        "FINISH": END  # End the process when the plan is complete
    }
)

# Add a start edge from the initial state to the supervisor
research_graph.add_edge(START, "supervisor")

# Compile the state graph into a runnable chain
chain = research_graph.compile()

# Function to enter the research chain with an initial message
def enter_chain(message: str):
    # Initialize the research chain state with the user message
    results = {
        "messages": [HumanMessage(content=message)],
        "team_members": ["RAGSearcher", "Searcher", "Coder", "ChatBot"],  # Define available agents
    }
    return results

# Create the research chain by linking the entry function and the compiled chain
research_chain = enter_chain | chain


In [16]:
q = planner.invoke(dec.invoke('abstract of san.pdf and explain difference between san and attention, explain these concepts in simpler terms')).content
q

'Plan: Retrieve the abstract of the san.pdf file using the RAGSearcher agent to extract necessary information from the embedded document. #E1 = RAGSearcher[Abstract of san.pdf]\nPlan: Summarize the main topic of the san.pdf file based on the abstract retrieved from #E1. #E2 = ChatBot[Summarize main topic of #E1]\nPlan: Explain the Self-Modulating Attention Network (SAN) in simple terms using the abstract and main topic information from #E1 and #E2. #E3 = ChatBot[Explain SAN in simple terms #E1, #E2]\nPlan: Compare and contrast SAN with traditional Attention mechanisms in simple terms, utilizing the information from #E1, #E2, and #E3 to highlight differences. #E4 = ChatBot[Compare SAN and traditional Attention mechanisms #E1, #E2, #E3]'

In [102]:
for s in research_chain.stream(
    q, {"recursion_limit": 100}
):
    if "__end__" not in s:
        print(s)
        print("---")

{'supervisor': {'next': 'RAGSearcher', 'step': 'Retrieve the abstract of the paper', 'input': 'Abstract of a.pdf'}}
---


InvalidUpdateError: Expected dict, got [HumanMessage(content='Task: Summarize the main contributions of the paper "Attention is All You Need".\n\n\nPlan: Retrieve the abstract of the paper "Attention is All You Need" from the vector database using the RAGSearcher agent. #E1 = RAGSearcher[Abstract of a.pdf]\n\n\nPlan: Summarize the main contributions of the paper based on the abstract. #E2 = llm[Summarize #E1]', additional_kwargs={}, response_metadata={})]
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_GRAPH_NODE_RETURN_VALUE

In [None]:
chat_node

In [87]:
# Simulated state for testing
test_state = {
    "messages": [HumanMessage(content="What is a Self-Attention Network (SAN)?")],
}

# Test the ChatBot node
output = chat_node(test_state)

# Print the output
print("ChatNode Output:")
for message in output["messages"]:
    print(f"{message.name}: {message.content}")


ValueError: Invalid input type <class 'dict'>. Must be a PromptValue, str, or list of BaseMessages.