In [1]:
import os
os.environ['LANGSMITH_TRACING']='true'
os.environ['LANGSMITH_TRACING_V2']='true'

## Summarization

This is a relatively easy process in LLM, however, the map and reduce approach show how we could build a complex execution graph



In [None]:
# Approach one, we just concatethe document into the  context and ask the llm to summarize it
# This call stuff

#load basic 

from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_vertexai import ChatVertexAI
from langchain.chat_models import init_chat_model

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

content = ""
for doc in docs: 
    content += "\n\n"+doc.page_content


llm = ChatVertexAI(model="gemini-2.0-flash-001")

# We have to have user message. This does not work if we change the user role to system
prompt = ChatPromptTemplate([("user", "Please summarize the following text:\n\n {context}" )])
chain = prompt |llm

summary = chain.invoke({'context': content})

from IPython.display import display, Markdown
display(Markdown(summary.content))


## Map Reduce Approach


1. We need to split the documents into small chunk, each chunk should be less than the LLM max token.
2. The STARt to the first node has   a condition which emit a generate summary for each chunk.
3. We the collect summarization or collapse summarization. After collapse, If some of the summarization has long summarization, we continue to collapse.
4. Finally, we generate the final summary from collected summrizations

In [3]:
# prepare and split the documents into chunk

from langchain_text_splitters import CharacterTextSplitter

splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=0)
splitts = splitter.split_documents(docs)
len(splitts)



Created a chunk of size 1003, which is longer than the specified 1000


14

In [4]:
# Define the graph
import operator
from typing import Annotated, List, TypedDict, Literal
from langgraph.constants import Send
from langgraph.graph import START, END, StateGraph
from langchain.chains.combine_documents.reduce import (
    acollapse_docs,
    split_list_of_docs,
)

token_max=1000

def length_function(documents: list[Document]):
    """How many tokens for all the the input docs."""
    return sum(llm.get_num_tokens(doc.page_content) for doc in documents)


class OverallState(TypedDict):
    splitts: List[str] # original inputs
    summaries: Annotated[list, operator.add] # a summaries for each splits
    collapsed_summaries: List[Document] # convert summaries to document, and collapse if needed.
    final_summary: str # final output



class SummaryContent(TypedDict):
    text: str

# first step, generate a summary for each document
async def generate_summary(input: SummaryContent):
    ai_msg= await llm.ainvoke([
        ('system', "please write a concise summary for the text from user. Only output the summary"),
        ('human', input['text'])
    ])
    # The return should be a field in the OverallState since the generate_summary
    # is one Node in the overall graph.
    return {'summaries': [ai_msg.content]}

# Map each splits to a summary node
def map_summaries(state: OverallState):
    return [Send("generate_summary", {'text': split}) for split in state['splitts'] ]

# Convert the summary text to documents. 
def collect_summaries(state:OverallState):
    return {"collapsed_summaries": [Document(summary) for summary in state['summaries']]}


reduce_template = """
The following is a set of summaries:
{docs}
Take these and distill it into a final, consolidated summary
of the main themes.
"""

reduce_prompt = ChatPromptTemplate([("human", reduce_template)])

# reduce a set of document into one
async def _reduce(input: dict) -> str:
    prompt = reduce_prompt.invoke(input)
    response = await llm.ainvoke(prompt)
    return response.content

async def collapse_summaries(state:OverallState):
    doc_lists = split_list_of_docs(
        state['collapsed_summaries'], length_function, token_max
    )
    results =[]
    for doc_list in doc_lists:
        results.append(await acollapse_docs(doc_list, _reduce))

    return {"collapsed_summaries": results}

def should_collapse(state:OverallState)->Literal["collapse_summaries", "generate_final_summary"]:
    num_tokens = length_function(state['collapsed_summaries'])
    if num_tokens > token_max:
        return "collapse_summaries"
    else:
        return "generate_final_summary"

async def generate_final_summary(state:OverallState):
    summary = await _reduce(state['collapsed_summaries'])
    return {'final_summary': summary}

builder = StateGraph(OverallState)
builder.add_node('generate_summary', generate_summary)
builder.add_node('collect_summaries', collect_summaries)
builder.add_node('collapse_summaries', collapse_summaries)
builder.add_node('generate_final_summary', generate_final_summary)

builder.add_conditional_edges(START, map_summaries, ['generate_summary'])
builder.add_edge('generate_summary', 'collect_summaries')
builder.add_conditional_edges('collect_summaries', should_collapse)
builder.add_conditional_edges('collapse_summaries', should_collapse)
builder.add_edge('generate_final_summary', END)
graph = builder.compile()


In [5]:
async for step in graph.astream( {'splitts':[split.page_content for split in splitts]}):
    print(step.keys())


dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['generate_summary'])
dict_keys(['collect_summaries'])
dict_keys(['collapse_summaries'])
dict_keys(['generate_final_summary'])


In [7]:
result = await graph.ainvoke( {'splitts':[split.page_content for split in splitts]})

In [10]:
display(Markdown(result['final_summary']))

LLM-powered autonomous agents are an emerging field using large language models for planning, memory, and tool use to tackle complex tasks. Planning involves task decomposition and self-reflection to improve reasoning, while memory utilizes both short-term (in-context learning) and long-term (external vector stores). Tool use expands capabilities through external APIs, with architectures like MRKL and Toolformer being developed. Challenges include efficiency, reliance on long context windows, stability, and self-evaluation issues. Applications are being explored in areas like drug discovery and autonomous scientific experiment design, alongside simulations of human behavior. A related system, GPT-Engineer, generates code repositories from natural language, often using MVC architecture. However, challenges remain with LLM agents, including context length limits, long-term planning difficulties, error adaptation, and reliability issues with natural language interfaces. Continuous self-evaluation and learning from feedback are key for optimal performance.
