In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

LANGCHAIN_TRACING_V2 = os.getenv('LANGCHAIN_TRACING_V2')
LANGCHAIN_ENDPOINT = os.getenv('LANGCHAIN_ENDPOINT')
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

## Indexing

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


## Retrieval and Generation

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

# post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

USER_AGENT environment variable not set, consider setting it to identify your requests.


'Task Decomposition involves breaking down a complex task into smaller, manageable steps or subgoals. Techniques like Chain of Thought and Tree of Thoughts facilitate this process by generating step-by-step reasoning or multiple reasoning paths. It can be performed by language models through prompting, using task-specific instructions, or with external planning tools.'

### Part 2: Indexing

In [3]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [4]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string"""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

8

In [5]:
from langchain_openai import OpenAIEmbeddings

embed = OpenAIEmbeddings()
query_result = embed.embed_query(question)
document_result = embed.embed_query(document)
len(query_result)

1536

In [6]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.8806977856520077


In [7]:
# Load blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [8]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50)
splits = text_splitter.split_documents(blog_docs)

In [9]:
# Index
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

### Part 3: Retrieval

In [10]:
# Index
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})    # Number of nearby neighbors to fetch in the space.

In [11]:
docs = retriever.get_relevant_documents("What is Task Decomposition?")
len(docs)

  docs = retriever.get_relevant_documents("What is Task Decomposition?")


1

### Part 4: Generation

In [12]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [13]:
# LLM
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

In [14]:
# Chain
chain = prompt | llm

In [15]:
# Run
chain.invoke({"context": docs, "question": "What is Task Decomposition?"})

AIMessage(content='Task Decomposition is the process of breaking down a complex task into smaller, more manageable steps or subgoals. Techniques for task decomposition include prompting a large language model (LLM) with simple instructions or questions, using task-specific instructions (e.g., "Write a story outline"), or incorporating human inputs. This approach helps in planning and solving complicated tasks by simplifying them into easier components.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 78, 'prompt_tokens': 317, 'total_tokens': 395, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-nano-2025-04-14', 'system_fingerprint': None, 'id': 'chatcmpl-BuXT32edkJt9zjrYr4p2OA9VGOHIw', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run

In [16]:
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [17]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [18]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is the process of breaking down a complex task into smaller, more manageable steps or subgoals. Techniques for task decomposition include prompting large language models with simple instructions (e.g., listing steps or subgoals), using task-specific instructions (e.g., "Write a story outline"), or incorporating human inputs. This approach helps in planning and solving complicated tasks by transforming them into simpler components.'

### Part 5: Multi-query

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
# Load blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50)
splits = text_splitter.split_documents(blog_docs)

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
# Index
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [2]:
from langchain.prompts import ChatPromptTemplate
## Prompt

# Multi-query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives
    | ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [3]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """Unique union of retrieved docs"""
    # Flatten list of lists, and convert each document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is Task Decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})
len(docs)

  return [loads(doc) for doc in unique_docs]


8

In [5]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

final_rag_chain = (
    {"context": retrieval_chain,
     "question": itemgetter("question")}
    | prompt
    | llm 
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task Decomposition for LLM agents involves breaking down complex tasks into smaller, manageable subgoals or steps to facilitate efficient problem-solving. This process can be achieved through various methods:\n\n1. **Prompting Techniques:** Using simple prompts like "Steps for XYZ" or asking "What are the subgoals for achieving XYZ?" to guide the LLM in identifying smaller components of the main task.\n\n2. **Task-Specific Instructions:** Providing specific instructions tailored to the task, such as "Write a story outline," to help the model focus on particular aspects of the task.\n\n3. **Human Inputs:** Incorporating human guidance or oversight to assist in decomposing tasks effectively.\n\nAdditionally, advanced methods like **Chain of Thought (CoT)** prompting encourage the model to think step-by-step, while **Tree of Thoughts** extends this by exploring multiple reasoning paths simultaneously, generating a tree structure of possible solutions. These approaches enable the LLM to i

### Part 6: RAG Fusion

In [8]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [9]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion
    | ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [11]:
from langchain.load import loads, dumps

def reciprocal_rank_fusion(results: list[list], k=60):
    """Reciprocal_rank_fusion that takes multiple lists of ranked documents
        and an optional parameter k used in the RRF formula"""
        
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}
    
    # iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)
    
    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    
    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

question = "What is Task Decomposition for LLM agents?"
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

  (loads(doc), score)


7

In [13]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion,
     "question": itemgetter("question")}
    | prompt
    | llm 
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task Decomposition for LLM agents involves breaking down complex tasks into smaller, manageable subgoals or steps to facilitate efficient problem-solving. This process can be achieved through various methods:\n\n1. Using simple prompting techniques with the language model, such as asking "Steps for XYZ" or "What are the subgoals for achieving XYZ?" to guide the model in identifying smaller tasks.\n2. Employing task-specific instructions tailored to the nature of the task, for example, instructing the model to "Write a story outline" for creative writing.\n3. Incorporating human inputs to assist in decomposing tasks when necessary.\n\nAdditionally, advanced methods like the Tree of Thoughts extend this concept by exploring multiple reasoning paths at each step, generating a tree structure of possible solutions, and evaluating them through classifiers or majority voting. Overall, task decomposition helps transform complex problems into simpler components, enabling better planning and re

### Part 7: Decomposition

In [4]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [7]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)

# Chain
generate_queries_decomposition = (
    prompt_decomposition 
    | llm 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question": question})

In [8]:
questions

['1. What are the key architectural components of an autonomous agent system powered by large language models (LLMs)?  ',
 '2. How do perception, reasoning, and action modules integrate within an LLM-based autonomous agent?  ',
 '3. What infrastructure and data components are essential for deploying and maintaining an LLM-driven autonomous agent system?']

In [9]:
# Prompt 
template = """Here is the question you need to answer:
\n -- \n {question} \n -- \n
Here is any available background question + answer pairs:
\n -- \n {q_a_pairs} \n -- \n
Here is additional context relevant to the question:
\n -- \n {context} \n -- \n
Use the above context and any background question + answer pairs to answer the question: \n {question}
"""
decomposition_prompt = ChatPromptTemplate.from_template(template)

In [11]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Formay Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n"
    return formatted_string.strip()

q_a_pairs = ""
for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever,
         "question": itemgetter("question"),
         "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser())
        
    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n--\n" + q_a_pair

In [12]:
answer

"To effectively deploy and maintain an LLM-driven autonomous agent system, several key infrastructure and data components are essential. These components ensure the system's robustness, scalability, responsiveness, and continual improvement. Based on the provided context and architectural insights, the essential components include:\n\n1. **Computational Infrastructure:**  \n   - **High-Performance Servers or Cloud Platforms:** To host large language models (LLMs) and support intensive processing tasks. Cloud providers (e.g., AWS, Azure, GCP) offer scalable resources for model deployment, inference, and training.  \n   - **GPU/TPU Resources:** Specialized hardware accelerators are often necessary for efficient inference and fine-tuning of large models.\n\n2. **Model Hosting and Serving Infrastructure:**  \n   - **Model Deployment Frameworks:** Tools like TensorFlow Serving, TorchServe, or custom APIs facilitate scalable, low-latency access to LLMs.  \n   - **API Gateways and Load Balanc

In [14]:
# Answer each sub-question individually
from langchain import hub

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question, prompt_rag, sub_question_generate_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition
    sub_questions = sub_question_generate_chain.invoke({"question": question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, "question": sub_question})
        
        rag_results.append(answer)
        
    return rag_results, sub_questions

# Wrap the retrieval and RAG process in a runnablelambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)

  retrieved_docs = retriever.get_relevant_documents(sub_question)


In [15]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:
{context}
Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)
final_rag_chain.invoke({"context": context, "question": question})

'The main components of an LLM-powered autonomous agent system include the large language model (LLM) itself, which acts as the core decision-maker; planning modules responsible for decomposing tasks and enabling reflection; perception modules that gather environmental information; action modules that facilitate interaction with external tools or environments; memory components that store past actions, knowledge, and reflections; and supporting infrastructure such as computational resources and APIs to ensure reliable operation and scalability. Together, these components enable the system to perceive, reason, plan, act, reflect, and learn continuously.'

### Part 8: Step Back

In [16]:
# Few Shot examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel's was born in what country?",
        "output": "what is Jan Sindel's personal history?",
    },
]
# We now transform these to example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [19]:
generate_queries_step_back = prompt | llm | StrOutputParser()
question = "What is task decomposition for LLM agents?"
generate_queries_step_back.invoke({"question": question})

'what is the process of breaking down tasks for language model agents?'

In [21]:
from langchain_core.runnables.base import RunnableLambda

# Response prompt
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}
# 
# Original Question: {question}
# Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_step_back | retriever,
        # Pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | llm
    | StrOutputParser()
)

chain.invoke({"question": question})

'Task decomposition for LLM (Large Language Model) agents refers to the process of breaking down complex, large-scale tasks into smaller, more manageable sub-tasks or steps. This approach enables the agent to handle intricate problems efficiently by focusing on simpler components sequentially or in parallel. \n\nThere are several key techniques and concepts associated with task decomposition in this context:\n\n1. **Chain of Thought (CoT):**  \n   - A prompting technique where the model is guided to "think step by step."  \n   - It decomposes a complex problem into a sequence of smaller reasoning steps, allowing the model to process and solve each part incrementally.  \n   - This method enhances performance on difficult tasks by making the reasoning process explicit and interpretable.\n\n2. **Tree of Thoughts (ToT):**  \n   - An extension of CoT that explores multiple reasoning pathways simultaneously.  \n   - It generates multiple thoughts or solutions at each step, forming a tree str

### Part 9: HyDE

In [22]:
from langchain_core.prompts import ChatPromptTemplate

# HyDE document generation
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | llm | StrOutputParser()
)

# Run 
question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question": question})

"Task decomposition for large language model (LLM) agents refers to the process of breaking down complex, high-level tasks into smaller, more manageable subtasks that can be individually addressed by the model. This approach enhances the agent's ability to handle intricate problems by enabling systematic reasoning, reducing cognitive load, and improving overall performance. In practice, task decomposition involves identifying the constituent components of a given problem, structuring these components hierarchically or sequentially, and generating intermediate steps or prompts that guide the LLM through each subtask. This methodology leverages the LLM's capacity for contextual understanding and reasoning, allowing it to assemble solutions from simpler, well-defined units. By decomposing tasks, LLM agents can achieve greater accuracy, interpretability, and scalability, particularly in complex domains such as multi-step reasoning, multi-turn dialogues, and problem-solving scenarios. Overa

In [23]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever
retrieved_docs = retrieval_chain.invoke({"question": question})
retrieved_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Component One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a

In [25]:
# RAG
template = """Answer the following question based on this context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context": retrieved_docs, "question": question})

'Task decomposition for LLM agents involves breaking down complex, large tasks into smaller, more manageable subgoals or steps. This process enables the agent to handle intricate tasks more efficiently and effectively. There are several methods for task decomposition:\n\n1. Using simple prompts with the LLM, such as "Steps for XYZ" or "What are the subgoals for achieving XYZ?".\n2. Employing task-specific instructions tailored to the particular task, for example, "Write a story outline" for creative writing.\n3. Incorporating human inputs to guide the decomposition process.\n\nAdditionally, techniques like Chain of Thought (CoT) prompt the model to think step-by-step, while Tree of Thoughts (ToT) explores multiple reasoning paths at each step by generating a tree of possible thoughts, which can be searched using algorithms like BFS or DFS. These methods help the agent plan and reason more effectively by systematically breaking down and exploring the problem space.'

### Part 10: Routing (Technical + semantic)

In [27]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be the most relevant for answering their question",
    )
    
# LLM with function call
llm = ChatOpenAI(model="gpt-4.1-nano-2025-04-14", temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

# prompt
system = """You are an expert at routing a user question to the appropriate data source.
Based on the programming language the question is referring to, route it to the relevant data source."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define router
router = prompt | structured_llm



In [28]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human', "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [29]:
result

RouteQuery(datasource='python_docs')

In [30]:
result.datasource

'python_docs'

In [31]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        ### Logic here
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        ### Logic here
        return "chain for js_docs"
    else:
        ### Logic here
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [32]:
full_chain.invoke({"question": question})

'chain for python_docs'

In [33]:
# Semantic routing

from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Two prompts 
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

# Route question to prompt
def prompt_router(input):
    # Embed question
    query_embedding = embeddings.embed_query(input["query"])
    # Compute cosine similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # Chosen prompt
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return ChatPromptTemplate.from_template(most_similar)

chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | llm
    |StrOutputParser()
)

print(chain.invoke("What is a black hole?"))

Using PHYSICS
A black hole is a region in space where gravity is so strong that nothing, not even light, can escape from it. It forms when a massive star collapses under its own gravity at the end of its life. The boundary around a black hole where escape is impossible is called the event horizon. Inside, matter is compressed into a very small point called a singularity, where density becomes extremely high.
