In [8]:
from typing import Sequence

import bs4
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict

In [4]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
import os
import logging
from dotenv import load_dotenv
from langchain.embeddings.base import Embeddings
import bs4
from langchain.tools.retriever import create_retriever_tool
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.vectorstores import InMemoryVectorStore

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
import asyncio
from openai import AzureOpenAI
load_dotenv()

# Embeddings service using Azure OpenAI API
class EmbeddingsService:
    def __init__(self):
        self.client = AzureOpenAI(
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            api_version=os.getenv("API_VERSION")
        )
        self.model_name = os.getenv("MODEL_NAME")

    async def get_embeddings(self, text: str):
        """Generate embeddings for a given text."""
        try:
            response = self.client.embeddings.create(input=text, model="embedding")
            result = response.data[0].embedding
            logging.info(f"Full embedding: {result}")
            logging.info(f"dimension: {len(result)}")
            return result
        except Exception as e:
            logging.error(f"Error generating embeddings: {str(e)}")
            return []

import asyncio

class CustomAzureOpenAIEmbeddings(Embeddings):
    def __init__(self, embedding_service: EmbeddingsService):
        self.embedding_service = embedding_service

    def embed_query(self, text: str):
        # Use asyncio.run to get synchronous embeddings for a single query
        result = asyncio.run(self.embedding_service.get_embeddings(text))
        return result

    def embed_documents(self, texts: list):
        # Run each async embedding call synchronously
        results = [asyncio.run(self.embedding_service.get_embeddings(text)) for text in texts]
        return results
    
# Initialize the custom embedding service and pass it into the RAG setup
embedding_service = EmbeddingsService()
custom_embeddings = CustomAzureOpenAIEmbeddings(embedding_service)

In [6]:
import bs4
from langchain.tools.retriever import create_retriever_tool
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.vectorstores import InMemoryVectorStore

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv
import os

load_dotenv()

memory = MemorySaver()

llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.getenv("MODEL_NAME"),
    openai_api_version='2024-05-01-preview',
)


### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = InMemoryVectorStore.from_documents(
    documents=splits, embedding=custom_embeddings
)
retriever = vectorstore.as_retriever()


### Build retriever tool ###
tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]


agent_executor = create_react_agent(llm, tools, checkpointer=memory)

In [9]:
config = {"configurable": {"thread_id": "abc123"}}

for event in agent_executor.stream(
    {"messages": [HumanMessage(content="Hi! I'm bob")]},
    config=config,
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Hi! I'm bob

Hello Bob! How can I assist you today?


In [10]:
query = "What is Task Decomposition?"

for event in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]},
    config=config,
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What is Task Decomposition?
Tool Calls:
  blog_post_retriever (call_PZOxNzFokJcLMyYXHf7okU9C)
 Call ID: call_PZOxNzFokJcLMyYXHf7okU9C
  Args:
    query: Task Decomposition
Name: blog_post_retriever

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple th

In [11]:
query = "What according to the blog post are common ways of doing it? redo the search"

for event in agent_executor.stream(
    {"messages": [HumanMessage(content=query)]},
    config=config,
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What according to the blog post are common ways of doing it? redo the search
Tool Calls:
  blog_post_retriever (call_t24gDhXOnyW1DXUGK72mxcbU)
 Call ID: call_t24gDhXOnyW1DXUGK72mxcbU
  Args:
    query: common ways of Task Decomposition
Name: blog_post_retriever

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A comp