https://github.com/langchain-ai/langgraph/blob/main/examples/rewoo/rewoo.ipynb?ref=blog.langchain.dev

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.environ["AZURE_OPENAI_KEY"]
search_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
search_key = os.environ["AZURE_SEARCH_ADMIN_KEY"]

In [2]:
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI

from langchain.vectorstores.azuresearch import AzureSearch
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
)

import re
import textwrap

## [00] define a state dict to contain - task, plan, steps, and other variables.

In [3]:
from typing import TypedDict, List


class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str

## [0] Embedding/vectorisation

In [4]:
# For RAG

embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    api_key=azure_openai_key,
    azure_endpoint=azure_openai_endpoint,
    api_version="2023-09-01-preview",
    chunk_size=1 
)
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_endpoint,
    azure_search_key=search_key,
    index_name="boardai03",
    embedding_function=embeddings.embed_query,
)

retriever = vector_store.as_retriever(search_key="hybrid", search_kwargs={"k": 2})

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

[llm]

In [5]:
llm = AzureChatOpenAI(
    deployment_name="gpt-4",
    api_key=azure_openai_key,
    azure_endpoint=azure_openai_endpoint,
    api_version="2023-09-01-preview",    
)

## [1] Planner
https://github.com/langchain-ai/langgraph/blob/main/examples/rewoo/rewoo.ipynb?ref=blog.langchain.dev

In [6]:
prompt = """For the following task, make plans that can solve the problem step by step. For each plan, indicate \
which external tool together with tool input to retrieve evidence. You can store the evidence into a \
variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)

Tools can be one of the following:

(1) RAG_LLM[input]: A pretrained RAG+LLM like yourself. Useful when you need to act with general
world knowledge and common sense + specific knowledge and data especially for Board and secretary role. Prioritize it when you are confident in solving the problem
yourself. Input can be any instruction.

(2) SEND_EMAIL[input]: Worker that sends emails to recipient, checks status of the return of those email and chases. Input will be a list of recipient and contents.

(3) COMPILE[input]:Worker that add all the separate paper into one board paper. input will be multiple board paper pieces from board members.

(4) CIRCULATE[input]: Worker that send compiled board paper to board members and get feed back and fix. Input will be a board paper. 

(5) PEOPLE_DB[input]: Worker that gives you a list of relevant people's name, email based on their role, team. Input will be role or team name.

For example,
Task: execute this: To create a board paper, first, based on an approved agenda, the secretary sends an email to the NEC (or board members). The NEC then writes the paper. 
After the paper is written, the secretary compile, circulates and distributes the paper to the board members.


Plan: Find out what is approved agenda. #E1 = RAG+LLM[what is approved agenda]

Plan: Find out who are the NEC or board members. #E2 = PEOPLE_DB[who are the NEC or board members]

Plan: the secretary writes an email to the NEC (or board members). #E3 = RAG+LLM[write an email using given approved agenda: #E1]

Plan: the secretary writes and sends an email to the NEC (or board members).Then NEC writes the paper. #E4 = EMAIL[#E2]

Plan: compilePaper #E5 = compilePaper[#E4] 

Plan: After the paper is written, the secretary circulates and distributes the paper to the board members. #E6 = circulatePaper[#E5]


Begin! 
Describe your plans with rich details. Each Plan should be followed by only one #E.


\nTask: {task}
\nContext: {context}


\nAnswer:""" 


In [7]:
p_prompt = ChatPromptTemplate.from_messages([("user", prompt)])

Instead of model(llm+rag), test llm only

In [8]:
task = "create a board paper?"

# task ="execute this: To create a board paper, first, based on an approved agenda, the secretary sends an email to the NEC (or board members). The NEC then writes the paper. After the paper is written, the secretary compile, circulates and distributes the paper to the board members."

In [9]:
model = llm

In [10]:
# Regex to match expressions of the form  "Plan: E#... = ...[...]"
regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]" 

prompt_template = ChatPromptTemplate.from_messages([("user", prompt)])
planner = prompt_template | model


def get_plan(state: ReWOO):

    task = state["task"]

    result = planner.invoke({"task": task, "context": retriever, "recursion_limit":100}) #ask agent about the each task/plan again 
    
    # Find all matches in the sample text    
    matches = re.findall(regex_pattern, result.content) 
    # result.content = plan_string e.g. Plan: Find out what is approved agenda. #E1 = RAG+LLM[what is approved agenda]
    # matches = a list =  [Step description, step name(#E1, #E2, ...), tool, input]

    return {"steps": matches, "plan_string": result.content}


In [11]:
prompt_template

ChatPromptTemplate(input_variables=['context', 'task'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'task'], template="For the following task, make plans that can solve the problem step by step. For each plan, indicate which external tool together with tool input to retrieve evidence. You can store the evidence into a variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)\n\nTools can be one of the following:\n\n(1) RAG_LLM[input]: A pretrained RAG+LLM like yourself. Useful when you need to act with general\nworld knowledge and common sense + specific knowledge and data especially for Board and secretary role. Prioritize it when you are confident in solving the problem\nyourself. Input can be any instruction.\n\n(2) SEND_EMAIL[input]: Worker that sends emails to recipient, checks status of the return of those email and chases. Input will be a list of recipient and contents.\n\n(3) COMPILE[input]:Worker that add all th

In [12]:
result = planner.invoke({"task": task, "context": retriever, "recursion_limit":100})

In [13]:
result.content
print("\n".join(textwrap.wrap(result.content, width = 140)))

Plan: First, we need to understand what a board paper is. #E1 = RAG+LLM[What is a board paper?]  Plan: Next, we need to find out who the
board members are. #E2 = PEOPLE_DB[Board Members]  Plan: Now that we know who the board members are, we can draft an email to them, asking
for their input on the board paper. #E3 = RAG+LLM[Draft an email to board members asking for their input on the board paper]  Plan: After
the email is drafted, we need to send it to the board members. #E4 = SEND_EMAIL[#E2, #E3]  Plan: Once we have received the input from the
board members, we can compile the separate pieces into one board paper. #E5 = COMPILE[#E4]  Plan: After the board paper is compiled, we need
to circulate it for feedback and make any necessary revisions. #E6 = CIRCULATE[#E5]  Plan: Finally, once the board paper is finalized, we
need to distribute it to the board members. #E7 = SEND_EMAIL[#E2, #E6]


## [2] EXECUTOR

Define the tool execution node

In [20]:
# load plugins & test

from plugins.basic import BasicPlugins
plugin_instance = BasicPlugins()


result = plugin_instance.email_function("recipients: x,y,z | content: xyz...")



boardpaper_pieces = [
    "agenda: performance, contents: performance_xyz",
    "agenda: operation, contents: operation_xyz",
    "agenda: profit, contents: profit_xyz",
]
result = plugin_instance.compile_paper(boardpaper_pieces)

result = plugin_instance.circulate_paper()
result = plugin_instance.people_db("who are board members?")



Board Paper  Contents:  1. Performance Analysis 2. Operation Report 3. Profit Review  Agendas:  1. Performance 2. Operation 3. Profit  ---
1. Performance  Performance Analysis  Content XYZ  ---  2. Operation  Operation Report  Content XYZ  ---  3. Profit  Profit Review  Content
XYZ  ---  Circulated to: CEO XYZ, COO XYZ, etc.   Changes made: XYZ (as requested)  Agent: The board paper has been compiled and all
requested changes have been made. It is now ready for circulation to all board members.
Board Members:  - Mike CEO - mike@mycompany.com - CEO - Olivia Johnson - olivia@mycompany.com - CFO - Alex Rodriguez - alex@mycompany.com -
COO


In [21]:
def _get_current_task(state: ReWOO):
    if state["results"] is None:
        return 1
    if len(state["results"]) == len(state["steps"]):
        return None
    else:
        return len(state["results"]) + 1
    

# initialise plugins
from plugins.basic import BasicPlugins
plugin_instance = BasicPlugins()


prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


output_parser = StrOutputParser()
rag_chain = prompt | model | output_parser

def tool_execution(state: ReWOO):
    """Worker node that executes the tools of a given plan."""

    _step = _get_current_task(state)
    step_instruction, step_name, tool, tool_input = state["steps"][_step - 1] # ('compile them into a single board paper. ', '#E4', 'COMPILE', '#E3')
    _results = state["results"] or {}

    for k, v in _results.items():
        tool_input = tool_input.replace(k, v)
    
    print("[tool_execution] Print selected tool name: ", tool, "/n")

    if tool == "RAG_LLM":

        docs = retriever.get_relevant_documents(tool_input)
        docs_str = format_docs(docs)

        print("[tool_execution] tool input: ", tool_input, "/n") 

        result = rag_chain.invoke({"context": docs_str, "question": tool_input, "recursion_limit":100})

        # print("---print pretty [start]---")
        # print("\n".join(textwrap.wrap(result, width = 140)))
        # print("---print pretty [end]---")

        
        print("[tool_execution] rag invoke result: ", result, "/n")


    elif tool =="PEOPLE_DB":
        result = plugin_instance.people_db(tool_input)
        
    elif tool =="SEND_EMAIL":
        result = plugin_instance.email_function(tool_input)

    elif tool =="COMPILE":
        boardpaper_pieces = [
            "agenda: performance, contents: performance_xyz",
            "agenda: operation, contents: operation_xyz",
            "agenda: profit, contents: profit_xyz",
        ]
        result = plugin_instance.compile_paper(boardpaper_pieces)

    elif tool =="CIRCULATE":
        result = plugin_instance.circulate_paper()
    else:
        raise ValueError
    
    _results[step_name] = str(result) # #E1 = xyz
    return {"results": _results}

## [3] Solver 

The solver receives the full plan and generates the final response based on the responses of the tool calls from the worker.

Solver has its own prompt.

In [22]:
solve_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
contain irrelevant information.

{plan}

Now solve the question or task according to provided Evidence above. Respond with the answer
directly with no extra words.

Task: {task}
Response:"""

def solve(state: ReWOO):
    plan = ""
    for _plan, step_name, tool, tool_input in state["steps"]:
        _results = state["results"] or {}
        for k, v in _results.items():
            tool_input = tool_input.replace(k, v)
            step_name = step_name.replace(k, v)
        plan += f"Plan: {_plan}\n{step_name} = {tool}[{tool_input}]"
    prompt = solve_prompt.format(plan=plan, task=state["task"])
    result = model.invoke(prompt)                                      # should I use LLM or RAG + LLM ???? Try just LLM first, will be faster
    return {"result": result.content}

## [4] Define Graph

Our graph defines the workflow. 

Each of the planner, worker(tool executor), and solver modules are added as nodes.

In [23]:
def _route(state):
    _step = _get_current_task(state)
    if _step is None:
        # We have executed all tasks
        return "solve"
    else:
        # We are still executing tasks, loop back to the "tool" node
        return "tool"

In [24]:
from langgraph.graph import StateGraph, END

graph = StateGraph(ReWOO)
graph.add_node("plan", get_plan)
graph.add_node("tool", tool_execution)
graph.add_node("solve", solve)
graph.add_edge("plan", "tool")
graph.add_edge("solve", END)
graph.add_conditional_edges("tool", _route)
graph.set_entry_point("plan")

app = graph.compile()

In [25]:
task

'create a board paper?'

In [26]:
for s in app.stream({"task": task}):
    print("s in app.stream",s) #dict
    print("-------")


s in app.stream {'plan': {'steps': [('Identify the key elements needed to create a board paper. ', '#E1', 'RAG_LLM', 'What are the key elements needed to create a board paper?'), ('Determine who are the board members. ', '#E2', 'PEOPLE_DB', 'board members'), ('Formulate the content of the board paper based on the identified key elements. ', '#E3', 'RAG_LLM', 'How to write a board paper based on these key elements: #E1'), ('Compile the board paper with the formulated content. ', '#E4', 'COMPILE', '#E3'), ('Circulate the compiled board paper to the board members for their feedback. ', '#E5', 'CIRCULATE', '#E4'), ('Make necessary changes based on the feedback received from the board members. ', '#E6', 'RAG_LLM', 'How to modify the board paper based on the feedback: #E5'), ('Finalize the board paper after incorporating the feedback. ', '#E7', 'COMPILE', '#E6'), ('Distribute the final board paper to the board members. ', '#E8', 'SEND_EMAIL', '{"Recipients": #E2, "Content": #E7}')], 'plan_st

In [27]:
# res_data = {'tool': {'results': {'#E1': 'To create a board paper, first, based on the approved agenda (usually involving performance, operation, profit, and any additional topics), the secretary emails the NEC, which is assumed to be the board members. Then, the NEC writes the paper. Finally, the secretary circulates and distributes the paper to the board members.', '#E2': 'To create a board paper, you start by establishing the approved agenda which usually includes performance, operation, profit, and any additional topics. This agenda is emailed to the NEC (assumed to be board members). After the NEC writes the paper, the secretary is responsible for circulating and distributing the paper to all board members.', '#E3': 'The task does not provide specific names or emails to fulfill the request.', '#E4': 'email sent'}}}

# # Extracting the value associated with #E3
# value_E3 = data['tool']['results']['#E3']
# print(value_E3)