In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

from langchain_openai import AzureChatOpenAI


llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.getenv("MODEL_NAME"),
    openai_api_version='2024-05-01-preview',
)

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
import os
import logging
from dotenv import load_dotenv
from langchain.embeddings.base import Embeddings
import bs4
from langchain.tools.retriever import create_retriever_tool
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.vectorstores import InMemoryVectorStore

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
import asyncio

load_dotenv()

# Embeddings service using Azure OpenAI API
class EmbeddingsService:
    def __init__(self):
        from openai import AzureOpenAI
        self.client = AzureOpenAI(
            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            api_version=os.getenv("API_VERSION")
        )
        self.model_name = os.getenv("MODEL_NAME")

    async def get_embeddings(self, text: str):
        """Generate embeddings for a given text."""
        try:
            response = self.client.embeddings.create(input=text, model="embedding")
            result = response.data[0].embedding
            logging.info(f"Full embedding: {result}")
            logging.info(f"dimension: {len(result)}")
            return result
        except Exception as e:
            logging.error(f"Error generating embeddings: {str(e)}")
            return []

import asyncio

class CustomAzureOpenAIEmbeddings(Embeddings):
    def __init__(self, embedding_service: EmbeddingsService):
        self.embedding_service = embedding_service

    def embed_query(self, text: str):
        # Use asyncio.run to get synchronous embeddings for a single query
        result = asyncio.run(self.embedding_service.get_embeddings(text))
        return result

    def embed_documents(self, texts: list):
        # Run each async embedding call synchronously
        results = [asyncio.run(self.embedding_service.get_embeddings(text)) for text in texts]
        return results
    
# Initialize the custom embedding service and pass it into the RAG setup
embedding_service = EmbeddingsService()
custom_embeddings = CustomAzureOpenAIEmbeddings(embedding_service)



USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
from typing import Sequence

import bs4
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict



### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = InMemoryVectorStore.from_documents(
    documents=splits, embedding=custom_embeddings
)
retriever = vectorstore.as_retriever()


### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
class State(TypedDict):
    input: str
    chat_history: Annotated[Sequence[BaseMessage], add_messages]
    context: str
    answer: str


def call_model(state: State):
    response = rag_chain.invoke(state)
    return {
        "chat_history": [
            HumanMessage(state["input"]),
            AIMessage(response["answer"]),
        ],
        "context": response["context"],
        "answer": response["answer"],
    }


workflow = StateGraph(state_schema=State)
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [5]:
config = {"configurable": {"thread_id": "abc123"}}

result = app.invoke(
    {"input": "What is Task Decomposition?"},
    config=config,
)
print(result["answer"])

Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. Techniques like Chain of Thought (CoT) and Tree of Thoughts (ToT) are used to enhance model performance by decomposing tasks into simpler components and exploring multiple reasoning possibilities at each step. This helps in understanding and solving complicated tasks systematically.


In [6]:
result = app.invoke(
    {"input": "What is one way of doing it?"},
    config=config,
)
print(result["answer"])

One way of doing task decomposition is by using Large Language Models (LLMs) with simple prompting, such as "Steps for XYZ.\n1." or "What are the subgoals for achieving XYZ?" which guides the model to break down the task into smaller steps.
