In [None]:
%env 

In [5]:
import boto3
boto3_bedrock  = boto3.client("bedrock-runtime", 'us-east-1')

# LLM
from langchain.llms.bedrock import Bedrock
# MapReduce chain
from langchain.chains import ReduceDocumentsChain, StuffDocumentsChain, MapReduceDocumentsChain, LLMChain
# Prompt
from langchain.prompts import PromptTemplate

# loaders
from langchain.document_loaders.pdf import PyPDFLoader

llm = Bedrock(
    model_id="anthropic.claude-v2",
    model_kwargs={
        "temperature": 0,
        "top_k": 250,
        "top_p": 1,
    },
    client=boto3_bedrock,
)

# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes 
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Reduce
reduce_template = """
The following is set of summaries:
{doc_summaries}
Take these and distill it into a final, consolidated summary of the main themes. """
reduce_prompt = PromptTemplate.from_template(reduce_template)

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=8000,
    verbose=True
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
    verbose=True
)

# Load documents
loader = PyPDFLoader("./cloud/aws-caf-ebook.pdf")
split_docs = loader.load_and_split()



In [6]:
# Execute
result = map_reduce_chain.run(split_docs)
print(result)



[1m> Entering new MapReduceDocumentsChain chain...[0m

[1m> Finished chain.[0m
 Here is a consolidated summary of the main themes across the documents:

The key themes focus on leveraging the cloud and modern technologies to digitally transform organizations. Specific strategies include:

- Adopting the cloud in a structured way using frameworks like the AWS Cloud Adoption Framework (CAF) 

- Modernizing applications through cloud-native architectures, containers, serverless, and microservices

- Building data lakes and pipelines to gain business insights through analytics

- Automating processes and optimizing costs for efficiency 

- Ensuring governance, security, compliance, and IT service delivery

- Transforming teams, culture, and organizational structures to support new ways of working

- Following best practices for cloud operations, architecture, and security

In summary, the documents provide a comprehensive playbook for digitally transforming an organization through cl