In [None]:
%pip install langchain langchain_openai pypdf langchain-community --quiet --upgrade

Note: you may need to restart the kernel to use updated packages.


In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI as genai_chat
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import requests
from dotenv import load_dotenv

In [21]:
# Download the PDF from blob storage:
url = (
    "https://storage.googleapis.com/strapi_cms_assets/principles_of_marketing_book.pdf"
)
response = requests.get(url)
with open("../data/principles_of_marketing_book.pdf", "wb") as f:
    f.write(response.content)

In [22]:
# Create a text splitter, load the PDF and split it:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
loader = PyPDFLoader("../data/principles_of_marketing_book.pdf")
pages = loader.load_and_split()

In [23]:
print(len(pages))

499


In [24]:
# Setup a chat_model in LangChain:
load_dotenv()
model_name = "models/gemini-2.0-flash-exp"

llm = genai_chat(model=model_name)

In [25]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# Define prompt
prompt = ChatPromptTemplate.from_template("Write a concise summary of the following:\\n\\n{context}")

# Instantiate chain
chain = create_stuff_documents_chain(llm, prompt)

# Invoke chain
result = chain.invoke({"context": pages})
print(result)

Here's a concise summary of the "Principles of Marketing" textbook:

This textbook provides a comprehensive overview of marketing principles, covering both traditional and contemporary approaches. It emphasizes the importance of creating, communicating, delivering, and exchanging value to customers. The book explores strategic planning, consumer and business buying behavior, market segmentation, targeting, and positioning. It also delves into the creation and management of offerings, marketing channels, supply chains, marketing research, integrated marketing communications (including advertising, public relations, social media, and sales), customer satisfaction, pricing strategies, and the development of a marketing plan. The text also discusses ethical considerations and the changing global environment in which businesses operate. Overall, the book aims to equip students with a solid understanding of marketing principles and their practical application in a variety of contexts.


In [11]:
from langchain_core.output_parsers import StrOutputParser

map_prompt = prompt

map_chain = map_prompt | llm | StrOutputParser()

In [13]:
# Invoke chain
result = map_chain.invoke({"context": pages})

print(result)

This document is a textbook called "Principles of Marketing" by the University of Minnesota Libraries Publishing edition in 2015. It's licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. The book covers topics like defining marketing, strategic planning, consumer behavior, business buying behavior, market segmentation, creating and managing offerings, marketing channels, supply chains, marketing research, integrated marketing communications, public relations, social media, professional selling, customer satisfaction and loyalty, pricing, and the marketing plan. It also includes discussion questions and activities for each chapter. The book is adapted from a work produced and distributed under a Creative Commons license in 2010 by a publisher who requested that they and the original author not receive attribution.


In [17]:
# Doing a smaller summarization using the same chain:
smaller_amount_of_pages = pages[:10]
second_result = map_chain.invoke(smaller_amount_of_pages)

In [18]:
second_result

'"Principles of Marketing" is a textbook adapted from a 2010 work under a Creative Commons license by the University of Minnesota Libraries Publishing. It covers core marketing concepts, including defining marketing, strategic planning, consumer and business buying behavior, market segmentation, creating and managing offerings, marketing channels and supply chains, marketing research, integrated marketing communications, public relations, professional selling, customer satisfaction, pricing, and the marketing plan. The book emphasizes the activities involved in marketing and focuses on the role of marketing professionals.'

# Doing a custom `MapReduceChain` to generate a summary in Spanish:


In [19]:
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter


map_template_string = """Give the following pages of a marketing book. Generate a summary in Spanish:
Pages:
{pages}

Summary:
"""

reduce_template_string = """Given the following Spanish summaries of pages of a marketing book, generate a high level description of the book in Spanish:
Summaries:
{summaries}
"""

# Prompt to use in map and reduce stages
MAP_PROMPT = PromptTemplate(input_variables=["pages"], template=map_template_string)
REDUCE_PROMPT = PromptTemplate(
    input_variables=["summaries"], template=reduce_template_string
)

# LLM to use in map and reduce stages
map_llm_chain = LLMChain(llm=llm, prompt=MAP_PROMPT)
reduce_llm_chain = LLMChain(llm=llm, prompt=REDUCE_PROMPT)

# Takes a list of documents and combines them into a single string
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_llm_chain,
    document_variable_name="summaries",
)

# Combining documents by mapping a chain over them, then combining results with reduce chain
combine_documents = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_llm_chain,
    # Reduce chain
    # The variable name in the llm_chain to put the documents in:
    document_variable_name="pages",
    combine_document_chain=combine_documents_chain,
)

map_reduce = MapReduceChain(
    combine_documents_chain=combine_documents,
    text_splitter=CharacterTextSplitter(
        separator="\n##\n", chunk_size=100, chunk_overlap=0
    ),
)

  map_llm_chain = LLMChain(llm=llm, prompt=MAP_PROMPT)
  combine_documents_chain = StuffDocumentsChain(
  combine_documents = MapReduceDocumentsChain(
  map_reduce = MapReduceChain(


In [None]:
map_reduce_result = map_reduce(
    {"input_text": "\n".join([doc.page_content for doc in pages])[0:100]}
)

{'output_text': 'El libro de marketing proporciona una introducción a los principios fundamentales del marketing y su aplicación en el mundo empresarial. El autor destaca la importancia de conocer al cliente, segmentar el mercado y dirigirse a diferentes segmentos de clientes de manera efectiva. También explora conceptos como el posicionamiento y la diferenciación, así como el papel del marketing en la creación de valor y la construcción de relaciones sólidas con los clientes. En resumen, el libro establece las bases para comprender y aplicar con éxito las estrategias de marketing en el mundo empresarial actual.'}