In [1]:
# Install required packages
!pip install faiss-cpu futures langchain-community python-dotenv tqdm
!pip install rank_bm25
!pip install PyMuPDF
!pip install deepeval

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting futures
  Downloading futures-3.0.5.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2
Collecting PyMuPDF
  Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF
Successfully installed PyMuPDF-1.26.3
Collecting deepeval
  Downloading deepeval-3.3.5-py3-none-any.whl.metadata (17 kB)
Collecting anthropic (from deepeval)
  Downloading anthropic-0.60.0-py3-none-any.whl.metadata (27 kB)
Collecting ollama (from deepeval)
  Downloading ollama-0.5.1-py3-none-any.whl.metadata (4.3 kB)
Collecting opentelemetry-api<2.0.0,>=1.24.0 (from deepeval)
  Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metada

In [1]:
!rm -rf rag-utils  # or whatever your repo folder is called

In [2]:
#Clone the repository to access helper functions and evaluation modules
!git clone https://github.com/databytobi/RAG_TECHNIQUES.git
import sys
sys.path.append('RAG_TECHNIQUES')
# If you need to run with the latest data
# !cp -r RAG_TECHNIQUES/data .

Cloning into 'RAG_TECHNIQUES'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 68 (delta 19), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (68/68), 10.02 MiB | 16.19 MiB/s, done.
Resolving deltas: 100% (19/19), done.


In [5]:
import os
import sys
import faiss
from tqdm import tqdm
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.docstore.document import Document


# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable (comment out if not using GOOGLE)
if not os.getenv('GOOGLE_API_KEY'):
    os.environ["GOOGLE_API_KEY"] = input("Please enter your GOOGLE API key: ")
else:
    os.environ["GOOGLE_API_KEY"] = os.getenv('GOOGLE_API_KEY')

# Original path append replaced for Colab compatibility
from helper_functions import *
#from evaluation.evalute_rag import *

In [6]:
#Download required data files
import os
os.makedirs('data', exist_ok=True)

# Download the PDF document used in this notebook
!wget -O data/Agents_v8.pdf https://raw.githubusercontent.com/databytobi/RAG_TECHNIQUES/main/data/Agents_v8.pdf
!wget -O data/Agents_v8.pdf https://raw.githubusercontent.com/databytobi/RAG_TECHNIQUES/main/data/Agents_v8.pdf

--2025-08-05 07:02:46--  https://raw.githubusercontent.com/databytobi/RAG_TECHNIQUES/main/data/Agents_v8.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9305713 (8.9M) [application/octet-stream]
Saving to: ‘data/Agents_v8.pdf’


2025-08-05 07:02:47 (132 MB/s) - ‘data/Agents_v8.pdf’ saved [9305713/9305713]

--2025-08-05 07:02:47--  https://raw.githubusercontent.com/databytobi/RAG_TECHNIQUES/main/data/Agents_v8.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9305713 (8.9M) [application/octet-stream]
Saving to: ‘data/Agents_v

Read PDF to string

In [8]:
path = "data/Agents_v8.pdf"

In [9]:
content = read_pdf_to_string(path)

Function to split text into chunks with metadata of the chunk chronological index

In [10]:
def split_text_to_chunks_with_indices(text: str, chunk_size: int, chunk_overlap: int) -> List[Document]:
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(Document(page_content=chunk, metadata={"index": len(chunks), "text": text}))
        start += chunk_size - chunk_overlap
    return chunks

Split the document accordingly

In [11]:
chunks_size = 400
chunk_overlap = 200
docs = split_text_to_chunks_with_indices(content, chunks_size, chunk_overlap)

Create vector store and retriever

In [12]:
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
chunks_query_retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

  embeddings = HuggingFaceEmbeddings()
  embeddings = HuggingFaceEmbeddings()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Function to draw the kth chunk (in the original order) from the vector store

In [13]:
def get_chunk_by_index(vectorstore, target_index: int) -> Document:
    """
    Retrieve a chunk from the vectorstore based on its index in the metadata.

    Args:
    vectorstore (VectorStore): The vectorstore containing the chunks.
    target_index (int): The index of the chunk to retrieve.

    Returns:
    Optional[Document]: The retrieved chunk as a Document object, or None if not found.
    """
    # This is a simplified version. In practice, you might need a more efficient method
    # to retrieve chunks by index, depending on your vectorstore implementation.
    all_docs = vectorstore.similarity_search("", k=vectorstore.index.ntotal)
    for doc in all_docs:
        if doc.metadata.get('index') == target_index:
            return doc
    return None

Check the function

In [14]:
chunk = get_chunk_by_index(vectorstore, 0)
print(chunk.page_content)

Agents
Authors: Julia Wiesinger, Patrick Marlow  
and Vladimir Vuskovic
Agents
2
February 2025
Acknowledgements
Content contributors
Evan Huang
Emily Xue
Olcan Sercinoglu
Sebastian Riedel
Satinder Baveja
Antonio Gulli
Anant Nawalgaria
Curators and Editors
Antonio Gulli
Anant Nawalgaria
Grace Mollison 
Technical Writer
Joey Haymaker
Designer
Michael Lanning 
Introduction
4
What is an agent?
5
The


Function that retrieves from the vector stroe based on semantic similarity and then pads each retrieved chunk with its num_neighbors before and after, taking into account the chunk overlap to construct a meaningful wide window arround it

In [15]:
def retrieve_with_context_overlap(vectorstore, retriever, query: str, num_neighbors: int = 1, chunk_size: int = 200, chunk_overlap: int = 20) -> List[str]:
    """
    Retrieve chunks based on a query, then fetch neighboring chunks and concatenate them,
    accounting for overlap and correct indexing.

    Args:
    vectorstore (VectorStore): The vectorstore containing the chunks.
    retriever: The retriever object to get relevant documents.
    query (str): The query to search for relevant chunks.
    num_neighbors (int): The number of chunks to retrieve before and after each relevant chunk.
    chunk_size (int): The size of each chunk when originally split.
    chunk_overlap (int): The overlap between chunks when originally split.

    Returns:
    List[str]: List of concatenated chunk sequences, each centered on a relevant chunk.
    """
    relevant_chunks = retriever.get_relevant_documents(query)
    result_sequences = []

    for chunk in relevant_chunks:
        current_index = chunk.metadata.get('index')
        if current_index is None:
            continue

        # Determine the range of chunks to retrieve
        start_index = max(0, current_index - num_neighbors)
        end_index = current_index + num_neighbors + 1  # +1 because range is exclusive at the end

        # Retrieve all chunks in the range
        neighbor_chunks = []
        for i in range(start_index, end_index):
            neighbor_chunk = get_chunk_by_index(vectorstore, i)
            if neighbor_chunk:
                neighbor_chunks.append(neighbor_chunk)

        # Sort chunks by their index to ensure correct order
        neighbor_chunks.sort(key=lambda x: x.metadata.get('index', 0))

        # Concatenate chunks, accounting for overlap
        concatenated_text = neighbor_chunks[0].page_content
        for i in range(1, len(neighbor_chunks)):
            current_chunk = neighbor_chunks[i].page_content
            overlap_start = max(0, len(concatenated_text) - chunk_overlap)
            concatenated_text = concatenated_text[:overlap_start] + current_chunk

        result_sequences.append(concatenated_text)

    return result_sequences

Comparing regular retrival and retrival with context window

In [16]:
#Baseline approach
query = "what is an agent ."
baseline_chunk = chunks_query_retriever.get_relevant_documents(query
    ,
    k=1
)
# Focused context enrichment approach
enriched_chunks = retrieve_with_context_overlap(
    vectorstore,
    chunks_query_retriever,
    query,
    num_neighbors=1,
    chunk_size=400,
    chunk_overlap=200
)

print("Baseline Chunk:")
print(baseline_chunk[0].page_content)
print("\nEnriched Chunks:")
print(enriched_chunks[0])

  baseline_chunk = chunks_query_retriever.get_relevant_documents(query


Baseline Chunk:
Agents
Authors: Julia Wiesinger, Patrick Marlow  
and Vladimir Vuskovic
Agents
2
February 2025
Acknowledgements
Content contributors
Evan Huang
Emily Xue
Olcan Sercinoglu
Sebastian Riedel
Satinder Baveja
Antonio Gulli
Anant Nawalgaria
Curators and Editors
Antonio Gulli
Anant Nawalgaria
Grace Mollison 
Technical Writer
Joey Haymaker
Designer
Michael Lanning 
Introduction
4
What is an agent?
5
The

Enriched Chunks:
Agents
Authors: Julia Wiesinger, Patrick Marlow  
and Vladimir Vuskovic
Agents
2
February 2025
Acknowledgements
Content contributors
Evan Huang
Emily Xue
Olcan Sercinoglu
Sebastian Riedel
Satinder Baveja
Antonio Gulli
Anant Nawalgaria
Curators and Editors
Antonio Gulli
Anant Nawalgaria
Grace Mollison 
Technical Writer
Joey Haymaker
Designer
Michael Lanning 
Introduction
4
What is an agent?
5
The model
6
The tools
7
The orchestration layer
7
Agents vs. models
8
Cognitive architectures: How agents operate 
8
Tools: Our keys to the outside world
12
Exten

In [17]:
document_content = """
 Machine learning (ML) has roots in early computing and neuroscience, with foundational ideas emerging in the 1940s, such as McCulloch and Pitts' neural networks and Hebbian learning.


 In the 1950s, ML became part of artificial intelligence, with Alan Turing proposing the Turing Test and Arthur Samuel creating one of the first self-learning programs.


 The 1960s and 1970s saw the development of algorithms like nearest neighbor and decision trees, though progress was limited by computational constraints.


 In the 1980s, despite the AI winter, Geoffrey Hinton revived neural networks with backpropagation, and expert systems gained traction.


 The 1990s marked a shift to statistical learning, with support vector machines, ensemble methods, and increased ML use in pattern recognition and games like IBM’s Deep Blue.


 In the 2000s, the explosion of internet data enabled large-scale ML applications in search engines, recommendation systems, and online advertising.


 The 2010s brought a deep learning revolution, driven by neural networks like CNNs and RNNs, and frameworks such as TensorFlow and PyTorch made ML more accessible.


 A major breakthrough came in 2012 with AlexNet winning the ImageNet competition, showing the power of deep convolutional networks.


 The 2020s introduced large language models (LLMs) like GPT-3 and ChatGPT, ushering in an era of generative AI and Retrieval-Augmented Generation (RAG).


 Today, machine learning powers intelligent agents, multimodal models, and real-time applications, with ongoing focus on scalability, fairness, and responsible AI development.
 """
chunks_size = 250
chunk_overlap = 20
document_chunks = split_text_to_chunks_with_indices(document_content, chunks_size, chunk_overlap)
document_vectorstore = FAISS.from_documents(document_chunks, embeddings)
document_retriever = document_vectorstore.as_retriever(search_kwargs={"k": 1})

query = "When was large language models introduced?"
context = document_retriever.get_relevant_documents(query)
context_pages_content = [doc.page_content for doc in context]

print("Regular retrieval:\n")
show_context(context_pages_content)

sequences = retrieve_with_context_overlap(document_vectorstore, document_retriever, query, num_neighbors=1)
print("\nRetrieval with context enrichment:\n")
show_context(sequences)

Regular retrieval:

Context 1:
major breakthrough came in 2012 with AlexNet winning the ImageNet competition, showing the power of deep convolutional networks.


 The 2020s introduced large language models (LLMs) like GPT-3 and ChatGPT, ushering in an era of generative AI and Retr



Retrieval with context enrichment:

Context 1:
h engines, recommendation systems, and online advertising.


 The 2010s brought a deep learning revolution, driven by neural networks like CNNs and RNNs, and frameworks such as TensorFlow and PyTorch made ML more accessible.


 A major breakthrough came in 2012 with AlexNet winning the ImageNet competition, showing the power of deep convolutional networks.


 The 2020s introduced large language models (LLMs) like GPT-3 and ChatGPT, ushering in an era of generative AI and Retrieval-Augmented Generation (RAG).


 Today, machine learning powers intelligent agents, multimodal models, and real-time applications, with ongoing focus on scalability, fairness, and resp