In [19]:
import os
from llama_index.core import VectorStoreIndex
from llama_index.postprocessor.colbert_rerank import ColbertRerank
from llama_index.readers.file import PyMuPDFReader

# Optional: Set up debug logging to see what llamaindex is doing
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [20]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co
Resetting dropped connection: huggingface.co
Resetting dropped connection: huggingface.co
Resetting dropped connection: huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1

In [22]:
# Load the document using PyMuPDFReader
file = "ColbertV2_2112.01488v3.pdf"  # Replace with your actual file path
reader = PyMuPDFReader()
docs = reader.load(file)

# Create the index
index = VectorStoreIndex.from_documents(documents=docs)


DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: ColBERTv2:
Effective and Efﬁcient Retrieval via...
> Adding chunk: ColBERTv2:
Effective and Efﬁcient Retrieval via...
> Adding chunk: ColBERTv2:
Effective and Efﬁcient Retrieval via...
> Adding chunk: ColBERTv2:
Effective and Efﬁcient Retrieval via...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: a cross-encoder and hard-negative mining (§3.2)...
> Adding chunk: a cross-encoder and hard-negative mining (§3.2)...
> Adding chunk: a cross-encoder and hard-negative mining (§3.2)...
> Adding chunk: a cross-encoder and hard-negative mining (§3.2)...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: COIL (Gao et al., 2021) also generates token-le...
> Adding chunk: COIL (Gao et al., 2021) also generates token-le...
> Adding chunk: COIL (Gao et al., 2021) also generates token-le...
> Adding chunk: COIL (Gao et al., 2021) also generates token-le...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chu

Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.43s/it]
Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.56s/it]
Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.52s/it]
Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.62s/it]


In [23]:
import os
from openaikey import key

os.environ["OPENAI_API_KEY"] = str(key())

In [24]:
colbert_reranker = ColbertRerank(
    top_n=5,
    model="colbert-ir/colbertv2.0",
    tokenizer="colbert-ir/colbertv2.0",
    keep_retrieval_score=True,
    
)

query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[colbert_reranker],
)



DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/tokenizer_config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/tokenizer_config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/tokenizer_config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/tokenizer_config.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/config.json HTTP/11" 200 0
https://huggingface.co:443 "HEAD /colbert-ir/colbertv2.0/resolve/main/config.json HTTP/11" 200 0


In [25]:
response = query_engine.query(
    "How is ColbertV2 better than Colbert ?",
)

# Print the reranked results
for node in response.source_nodes:
    print(node.id_)
    print(node.node.get_content())
    print("reranking score: ", node.score)
    print("retrieval score: ", node.node.metadata.get("retrieval_score", "N/A"))
    print("=====================================")


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 20.57it/s]

DEBUG:llama_index.core.indices.utils:> Top 10 nodes:
> [Node 667e6482-bade-419a-b454-f250400357ec] [Similarity score:             0.745617] Q: what is xerror in rpart?
Q: is sub question one word?
Q: how to open a garage door without mak...
> [Node 7285a6da-978c-4ef2-a006-7ff228f57c53] [Similarity score:             0.738183] Similarly, on the HoVer (Jiang et al., 2020) dev
set, Baleen’s retrieval R@100 dropped from 92.2%...
> [Node 61ac8c3b-6fde-47f6-b04c-1391368ac740] [Similarity score:             0.734178] Sub-table (a) reports results on BEIR and sub-table (b) reports results on
the Wikipedia Open QA ...
> [Node 07adbdc9-f86e-44a6-a677-b9a5a9b1b585] [Similarity score:             0.71913] In particular, when ap-
plied to a vanilla ColBERT model on MS MARCO
whose MRR@10 is 36.2% and Re...
> [Node acccae6d-cd0c-4d14-a15a-007c1e6009b0] [Similarity score:             0.709823] sages. This is known to lead to artiﬁcial lexical
bias (Lee et al., 2019), where crowdworkers cop...
> [Node 




DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "You are an expert Q&A system that is trusted around the world.\nAlways answer the query using the provided context information, and not prior knowledge.\nSome rules to follow:\n1. Never directly reference the given context in your answer.\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines."}, {'role': 'user', 'content': 'Context information is below.\n---------------------\ntotal_pages: 20\nfile_path: ColbertV2_2112.01488v3.pdf\nsource: 7\nretrieval_score: 0.705386269098783\n\nDev-set results for baseline systems are from their re-\nspective papers: Zhan et al. (2020b), Xiong et al. (2020)\nfor DPR and ANCE, Zhan et al. (2020a), Khattab and\nZaharia (2020), Hofstätter et al. (2021), Gao and Callan\n(2021), Ren et al. (2021a), Formal et al. (2021a), and\nRen et 

In [26]:
print(response)

ColbertV2 achieves higher quality than vanilla Colbert by combining denoised supervision and residual compression, leading to improved robustness with a reduced space footprint. Additionally, ColbertV2 exhibits state-of-the-art retrieval quality both within and outside its training domain, outperforming other retrievers on various out-of-domain benchmarks.
