# Example - Parsing Summaries of Business Books and query them with RAG

![](./images/llama-reading-book.png)

In [2]:
%pip install llama-index-readers-web html2text -qU

Note: you may need to restart the kernel to use updated packages.


In [19]:
url = "https://www.mentorist.app/books/"
urls = []

# Using BS4, find all h4.a.href of this URL
import requests
from bs4 import BeautifulSoup

n_pages_to_crawl = 3

for page in range(1, n_pages_to_crawl+1):
    response = requests.get(url+f"?&page={page}")
    soup = BeautifulSoup(response.text, "html.parser")
    books = soup.find_all("h4")
    for book in books:
        try:
            book_title = book.a["href"].split("/books/")[1]
            book_link = url + book_title
            urls += [book_link]
        except:
            pass

In [20]:
from llama_index.core import SummaryIndex
from llama_index.readers.web import SimpleWebPageReader
from IPython.display import Markdown, display
import os

documents = SimpleWebPageReader(html_to_text=True).load_data(urls)

In [21]:
# Setting up Bedrock
from llama_index.core import Settings
from llama_index.llms.bedrock import Bedrock
from llama_index.embeddings.bedrock import BedrockEmbedding, Models
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, Settings

# Models configuration
Settings.embed_model = BedrockEmbedding(model=Models.COHERE_EMBED_MULTILINGUAL_V3, region_name="us-west-2")
Settings.llm = Bedrock(model="anthropic.claude-3-haiku-20240307-v1:0", region_name="us-west-2")

# Indexing Phase
index = VectorStoreIndex.from_documents(
    documents=documents,
    transformations=[SentenceSplitter(chunk_size=400, chunk_overlap=100)] # Max tokens size is 512 for this model
)

In [24]:
# Querying Phase
query_engine = index.as_query_engine()
response = query_engine.query("Which book introduces the concept of Widley Important Goal (WIG)?")
print(response)

The book "The 4 Disciplines of Execution" by Chris McChesney, Sean Covey, and Jim Huling introduces the concept of the Wildly Important Goal (WIG). According to the context, this book suggests that focusing on one or two critical, wildly important goals can help achieve the best possible results, as opposed to attempting to concentrate on multiple tasks simultaneously.
