### Prompt

In [1]:
import bs4, tiktoken, numpy as np, os

from langchain import hub
from langchain.load import dumps, loads
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

from operator import itemgetter

from dotenv import load_dotenv
load_dotenv()

learn_api_key = os.environ['LANGCHAIN_API_KEY']
openai_api_key = os.environ['OPENAI_API_KEY']
groq_api_key = os.environ['GROQ_API_KEY']

In [2]:
### INDEXING ###
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

blog_docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=50)

# Make Splits
splits = text_splitter.split_documents(blog_docs)

# Index
vectorstore = Chroma.from_documents(
    splits,
    HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
)
retriever = vectorstore.as_retriever()

In [3]:
# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [4]:
# LLM
llm = ChatGroq(model='openai/gpt-oss-20b', api_key=groq_api_key, temperature=0)

# Chain
generate_queries_decomposition = (prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split('\n')))

question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})
questions

['1. “LLM‑powered autonomous agent architecture components”  ',
 '2. “Key modules in a large‑language‑model autonomous agent system”  ',
 '3. “Design elements of an autonomous agent powered by a large language model”']

In [5]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question:

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [6]:
def format_qa_pair(question, answer):
    """Format Q and A pair"""

    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# LLM
llm = ChatGroq(model='openai/gpt-oss-20b', api_key=groq_api_key, temperature=0)


In [7]:
q_a_pairs = ""
for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever,
         "question": itemgetter("question"),
         "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser())

    answer = rag_chain.invoke({"question":q, "q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n---\n"+q_a_pair

In [8]:
answer

'## Design Elements of an Autonomous Agent Powered by a Large Language Model\n\nBelow is a concise, modular blueprint that captures the essential building blocks of a robust LLM‑driven autonomous agent.  \nThe table and accompanying notes synthesize the key modules from the background Q&A, Lilian\u202fWeng’s 2023 article, and the broader literature on LLM agents.\n\n| **Element** | **Primary Responsibility** | **Typical Implementation** | **Key Design Considerations** |\n|------------|---------------------------|---------------------------|------------------------------|\n| **LLM Controller (Brain)** | Generates decisions, plans, and responses. | GPT‑4, Claude‑3, Llama‑2, etc. | Prompt‑engineering, system messages, and token‑budget management. |\n| **Planning / Goal‑Decomposition** | Breaks high‑level goals into actionable sub‑tasks, revises plans on‑the‑fly. | Prompt‑based planning, Tree‑of‑Thoughts, hierarchical task networks. | Robust error handling; plan‑repair loops. |\n| **Short‑

### Answer individually

In [9]:
# Answer each sub-question individually

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    """RAG on each sub-question"""
    sub_questions = sub_question_generator_chain.invoke({"question":question})

    rag_results = []

    for sub_question in sub_questions:
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context":retrieved_docs, "question": sub_question})
        rag_results.append(answer)
    return rag_results, sub_questions

answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)

  retrieved_docs = retriever.get_relevant_documents(sub_question)


In [10]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is the set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (prompt | llm | StrOutputParser())
final_rag_chain.invoke({"context":context, "question":question})

'**Main components of an LLM‑powered autonomous agent system**\n\n| # | Component | Core responsibilities |\n|---|----------|----------------------|\n| 1 | **Core LLM “brain”** | Generates plans, actions, and self‑reflection; serves as the agent’s decision‑making engine. |\n| 2 | **Planning & reasoning** | Decomposes tasks into sub‑goals, refines plans (chain‑of‑thought, Tree‑of‑Thoughts, ReAct, etc.), and guides long‑term execution. |\n| 3 | **Memory & retrieval** | Short‑term in‑context learning + long‑term external vector stores or knowledge bases that extend the LLM’s limited context window. |\n| 4 | **Tool‑execution interface** | Parses the LLM’s natural‑language outputs into API calls or code execution, handling formatting and error mitigation. |\n| 5 | **Self‑reflection & error handling** | Learns from past mistakes, adjusts plans, and incorporates safety or reward signals. |\n\nThese five pillars—brain, planning, memory, tool integration, and reflection—form the core architectu