# Map-reduce in `load_summarize_chain`

In [1]:
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
import tiktoken
from pprint import pprint
from tqdm import tqdm

MAP_TEMPLATE_TXT = """Write a detail summary of this text section in bullet points. 
Use '-' for bullet points and answer only the bullet points.
Text:
{text}

SUMMARY:"""
    
COMBINE_TEMPLATE_TXT = """Combine these summaries into a final summary in bullet points.
Use '-' for bullet points and answer only the bullet points.
Text:
{text}

FINAL SUMMARY:"""

map_prompt_txt = MAP_TEMPLATE_TXT
combine_prompt_txt = COMBINE_TEMPLATE_TXT

  from .autonotebook import tqdm as notebook_tqdm


# Configuration settings

In [2]:
with open("sample-text.txt", "r") as f:
    transcript = f.read()

model = "llama3.2"
base_url = "http://localhost:11434"

chunk_size = 2000 # this is in tokens
overlap_size = 0 # this is in tokens

temperature = 0.5

map_num_predict = 512 # number of tokens to predict, Default: 128, -1 = infinite generation, -2 = fill context
combine_num_predict = 2048

# Helper function

In [3]:
def get_text_splitter(chunk_size: int, overlap_size: int):
    return RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=overlap_size)

def convert_text_to_tokens(text, encoder="gpt-3.5-turbo"):
    enc = tiktoken.encoding_for_model(encoder)
    return enc.encode(text)

def get_larger_context_size(token_count):
    num_ctxes = [1024*i for i in range(1, 100)]
    num_ctx = next(ctx for ctx in num_ctxes if ctx > token_count) # pick the first context size that is greater than the token counts
    return num_ctx

# Prepare docs

In [4]:
def convert_transcript_to_split_docs(transcript, chunk_size, overlap_size):
    docs = [Document(
        page_content=transcript,
        # metadata={"source": url}
    )]
    text_splitter = get_text_splitter(chunk_size=chunk_size, overlap_size=overlap_size)
    split_docs = text_splitter.split_documents(docs)
    return split_docs

# Map Step

In [5]:
def run(transcript, chunk_size, overlap_size, map_prompt_txt, combine_prompt_text):
    split_docs = convert_transcript_to_split_docs(transcript, chunk_size, overlap_size)
    
    map_prompt = PromptTemplate(
        template=map_prompt_txt,
        input_variables=["text"]
    )

    combine_prompt = PromptTemplate(
        template=combine_prompt_text,
        input_variables=["text"]
    )

    map_num_ctx = get_larger_context_size(map_num_predict+chunk_size)
    
    llm_map = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=map_num_ctx,
        num_predict=map_num_predict,
        format='',
        verbose=True
    )
    
    summaries = []

    for i, splic_doc in enumerate(tqdm(split_docs, desc="Mapping...")):
        full_prompt = map_prompt.format_prompt(text=splic_doc.page_content)
        output = llm_map.invoke(full_prompt.text)
        summaries.append(output.content)
    
    combined_summaries = "\n".join(summaries)
    combine_prompt_text = COMBINE_TEMPLATE_TXT

    full_prompt = combine_prompt.format_prompt(text=combined_summaries)

    token_counts = len(convert_text_to_tokens(full_prompt.text))

    combine_num_ctx = get_larger_context_size(token_counts+combine_num_predict)

    llm_combine = ChatOllama(
        model=model,
        base_url=base_url,
        temperature=temperature,
        num_ctx=combine_num_ctx,
        num_predict=combine_num_predict,
        format='',
        verbose=True
    )

    output_comb = llm_combine.invoke(full_prompt.text)

    return output_comb.content

In [6]:
output = run(transcript, chunk_size, overlap_size, map_prompt_txt, combine_prompt_txt)

Mapping...: 100%|██████████| 22/22 [02:31<00:00,  6.90s/it]


In [7]:
pprint(output.replace('- -','-'))

('- Neural networks are simple mathematical expressions that can produce '
 'surprising emergent behavior despite their simplicity mathematically.\n'
 '\n'
 '- The optimization process used to train neural networks is different from '
 'the optimization process that gave rise to biological neural networks, '
 'making them "alien artifacts" rather than analogies to the brain.\n'
 '\n'
 '- Biological neural networks are part of a multi-agent self-play system that '
 'has evolved over time, allowing for survival and reproduction, whereas '
 'artificial neural networks are primarily used for compression and solving '
 'problems.\n'
 '\n'
 '- The origin of intelligence is an extremely remarkable story with many '
 "interesting events and processes, but it's difficult to pinpoint a single "
 'unique piece that stands out.\n'
 '\n'
 '- Intelligent life may not be unique to humans, and other civilizations '
 'could exist in the universe.\n'
 '\n'
 '- The origin of life on Earth might be more c

In [8]:
# with open("summary-map-reduce.txt", "w") as f:
#     f.write(output.replace("- -", "-"))