In [1]:
%%writefile requirements.txt
chromadb
langchain
langchain_community
sentence_transformers

Overwriting requirements.txt


In [2]:
import os
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.document_transformers import (
    EmbeddingsRedundantFilter,
    EmbeddingsClusteringFilter,
)
# from langchain.retrievers.document_compressors import DocumentCompressorPipeline
# from langchain.retrievers import ContextualCompressionRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyMuPDFLoader
from langchain.docstore.document import Document
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


# Embedding Model

In [3]:
import time
s = time.time()
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
print("Embedding Model Loaded in", time.time() - s, "seconds")



Embedding Model Loaded in 10.363645553588867 seconds


# Data Pre-processing

In [4]:
path = "data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf"

In [5]:
loader = PyMuPDFLoader(path)
documents = loader.load()
combined_text = "\n".join(doc.page_content for doc in documents)
document = Document(page_content=combined_text, metadata={"source": path})
document

Document(page_content='                                                               July/2011 \n \nACEA Translation                                                             1 \nFor Reference Purposes Only \n \nICS 43.040.20 \nT 38  \n \nNational Standard of the People’s Republic of China \n GB 25991-2010 \n \n \n \nAutomotive Headlamps with LED Light Sources \nand/or LED Modules  \n \n \n \n \n \n \n \nIssued on 2011-01-10                           Effective from 2012-01-01  \nPromulgated by \nState General Administration of Quality Supervision, Inspection and \nQuarantine of the People\'s Republic of China \nAnd \nStandardization Administration of China \n\n                                                               July/2011 \n \nACEA Translation                                                             2 \nFor Reference Purposes Only \n \nForeword \n \nParagraphs 5 and 6 (except for Paragraph 5.10 and 6.8) are compulsory, and the rest is \nfor recommend.  \nAll the Annexes

In [6]:
documents = [document]

In [7]:
# from langchain_community.document_loaders import TextLoader
# from langchain_community.document_loaders import DirectoryLoader
# # Load and process the text files
# loader = DirectoryLoader('data/', glob="./*.txt", loader_cls=TextLoader)
# documents = loader.load()  # text file to [document]

In [8]:
# loader = PyPDFLoader(path)
# documents = loader.load()
# len(documents)  # List of documents for each page

# Parent Document Retriever
Version 1 - Smaller chunks

In [23]:
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever

child_splitter = RecursiveCharacterTextSplitter(chunk_size=120, chunk_overlap=20)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

vectorstore = Chroma(
    collection_name="full_documents", embedding_function=hf
)
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)

In [24]:
s = time.time()
retriever.add_documents(documents, ids=None)
print("Documents added in", time.time()-s,"seconds")

Documents added in 225.83366322517395 seconds


In [11]:
# list(store.yield_keys())

In [12]:
query="The color of the light emitted by LED headlamps shall be?"
sub_docs = vectorstore.similarity_search(query)
sub_docs

[Document(page_content='\uf0b3 6 \n \n5.4  \nLight color  \n \nThe color of the light emitted by LED headlamps shall be white. Expressed in CIE', metadata={'doc_id': 'df826b2c-562a-4d23-96d9-3e3fe9e539af', 'source': 'data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf'}),
 Document(page_content='Color rendering  \n \nThe color rendering of LED headlamps or LED modules shall conform to the', metadata={'doc_id': '66f0e4c7-3a30-44cd-85de-5a0bb4adb62f', 'source': 'data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf'}),
 Document(page_content='Light sources  \n \nLED headlamps shall use the LED light source/LED module(s) in compliance with', metadata={'doc_id': '4a98b493-7944-44f0-a681-f526ba476a09', 'source': 'data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf'}),
 Document(page_content='The low-UV-radiation type LED headlamp or LED module shall be such that:', metada

In [13]:
retrieved_docs = retriever.get_relevant_documents(query)
retrieved_docs

  warn_deprecated(


[Document(page_content='Luminance \nEmax \n\uf0b3 48 and \uf0a3 240 \nPoint ‘HV’ \n\uf0b3 0.80Emax \nPoint ‘HV’ to 1125L and R \n\uf0b3 24 \nPoint ‘HV’ to 2250L and R \n\uf0b3 6 \n \n5.4  \nLight color  \n \nThe color of the light emitted by LED headlamps shall be white. Expressed in CIE \nchromatic coordinates, the chromatic characteristics shall conform to the provisions \nof GB 4785.  \n5.5  \nLight sources', metadata={'source': 'data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf'}),
 Document(page_content='Color rendering  \n \nThe color rendering of LED headlamps or LED modules shall conform to the \nrequirements of the minimum red content of the light:', metadata={'source': 'data/GB 25991-2010 Automotive Headlamps with LED Light Sources andor LED Modules (1) (2).pdf'}),
 Document(page_content='Light sources  \n \nLED headlamps shall use the LED light source/LED module(s) in compliance with \nthis Standard; any other light source contained

In [30]:
def get_relevant_chunks(query):
    sub_docs = vectorstore.similarity_search(query)
    retrieved_docs = retriever.get_relevant_documents(query)
    for i in range(len(retrieved_docs)):
        print("---------------------------------------------",i+1)
        print(retrieved_docs[i].page_content)

In [31]:
query1 = "The LED module of headlamp shall be operated for how many hours before cooled down toambient temperature before starting the tests?"
get_relevant_chunks(query1)

--------------------------------------------- 1
luminometer being Class 1 as specified in the national calibration procedures 
(errors of indicated values not exceeding ±4%). 
6.2  
The luminous and electrical performance shall be measured at the ambient 
temperature of 23C±5C and the relative humidity of 20% ~ 80%. 
6.3  
Photometric measurements  
6.3.1  
The LED module of headlamp shall be operated for 15 h and cooled down to 
ambient temperature before starting the tests.   
6.3.2
--------------------------------------------- 2
carried out with the filament lamp whose wattage is the highest that can be used. 
A.1.1.2  
Test result  
A.1.1.2.1  Visual inspection  
 
Once the headlamp has been cooled down to the ambient temperature, the 
headlamp lens shall be cleaned with a clean, damp cotton cloth. It shall then be 
inspected visually; no distortion, deformation, cracking or change in color shall be 
noticeable. 
A.1.1.2.2  Photometric test


In [32]:
query2 = "Tell me about Test of stability of photometric performance."
get_relevant_chunks(query2)

--------------------------------------------- 1
July/2011 
 
ACEA Translation                                                             16 
For Reference Purposes Only 
 
Annex A 
(Normative) 
Tests for Stability of Photometric Performance of LED Headlamps 
A.1  
Test of stability of photometric performance  
 
The tests shall be carried out in a dry and still atmosphere at an ambient 
temperature of 23±5°C, the complete headlamp being mounted on a base
--------------------------------------------- 2
B.1.5  
The photometric values, measured after one minute and after photometric stability 
has occurred, shall comply with the minimum and maximum requirements. 
B.2  
 Light color  
The color of the light emitted measured after one minute and measured after 
photometric stability has been obtained, as described in Paragraph B.1.2 of this 
Annex, shall both be within the required color boundaries. 
B.3  
 
The measurement of the objective luminous flux of LED module(s) producing


In [35]:
query4 = "What are environmental conditions for test for change in vertical position of the cut-off line under the influence ofheat?"
get_relevant_chunks(query4)

--------------------------------------------- 1
illuminating value has dropped to 15 ~ 20% of the driving beam value initially 
measured at point Emax and 50R and 50V for a passing lamp.  
A.1.2.1.4  Measuring equipment:  
 
The measuring equipment shall be equivalent to that used during headlamp 
approval tests. A standard filament lamp shall be used for the photometric 
verification of semi-enclosed headlamps.  
A.2  
 
Test for change in vertical position of the cut-off line under the influence of 
heat:
--------------------------------------------- 2
heat:  
 
This test is intended for checking whether the vertical drift of the cut-off line under 
the influence of heat exceeds a specified value.  
 
The headlamp tested in accordance with Paragraph A.1 shall be subjected to the 
test described in Paragraph A.2.1 below, without being removed from or readjusted 
in relation to its test fixture. 
A.2.1  
Test  
 
The test shall be carried out in a dry and still atmosphere at an ambient

In [34]:
query3 = "For headlamp with the outside lens in glass, a mixture to be applied to the headlamp lens shall consist of which elements"
get_relevant_chunks(query3)


--------------------------------------------- 1
A mixture to be applied to the headlamp lens shall consist of (by weight):  
 
- 9 parts of silica sand with a particle size of 0 ~ 100 m;  
 
- 1 part of vegetable carbon dust with a particle size of 0 ~ 100 m; 
 
- 0.2 part of NaCMC 4) and an appropriate quantity of distilled water (with a 
conductivity < 1mS/m). 
 
The mixture shall be valid only within 14 days.
--------------------------------------------- 2
July/2011 
 
ACEA Translation                                                             18 
For Reference Purposes Only 
 
A.1.2.2.2 For headlamp with outside lens in plastic material 
 
A mixture to be applied to the headlamp lens shall consist of (by weight):  
 
- 9 parts of silica sand with a particle size of 0 ~ 100 m;  
 
- 1 part of vegetable carbon dust with a particle size of 0 ~ 100 m; 
 
- 0.2 part of NaCMC;


In [18]:
pip install cohere

Collecting cohere
  Obtaining dependency information for cohere from https://files.pythonhosted.org/packages/9c/69/4822001b6cbb23afb8b008eda54415e99d3adecd6faf07f28538bf447264/cohere-5.5.6-py3-none-any.whl.metadata
  Downloading cohere-5.5.6-py3-none-any.whl.metadata (3.6 kB)
Collecting boto3<2.0.0,>=1.34.0 (from cohere)
  Obtaining dependency information for boto3<2.0.0,>=1.34.0 from https://files.pythonhosted.org/packages/8f/7b/fcee692d939f8b4dc5406d4daf15c4d98b8cfa6908a77f224092d445928f/boto3-1.34.122-py3-none-any.whl.metadata
  Downloading boto3-1.34.122-py3-none-any.whl.metadata (6.6 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Obtaining dependency information for fastavro<2.0.0,>=1.9.4 from https://files.pythonhosted.org/packages/2b/7e/18d9b5c3e1621f58c1f4bee67ec1e4dd8ce8287e70c97262088a5673145c/fastavro-1.9.4-cp311-cp311-win_amd64.whl.metadata
  Downloading fastavro-1.9.4-cp311-cp311-win_amd64.whl.metadata (5.7 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from cohere)
  Obta

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\VISHAL MAURYA\\Desktop\\Document Comparision\\pdr_venv\\Lib\\site-packages\\~okenizers\\tokenizers.cp311-win_amd64.pyd'
Check the permissions.


[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [19]:
import cohere
co = cohere.Client('w8CnnlzVol2aZEiirZNLUs0onAqXUUYBZCw2Oj7g')

ModuleNotFoundError: No module named 'cohere'

In [None]:
retrieved_docs_query1=[]
for i in range(len(retrieved_docs_1)):
  retrieved_docs_query1.append(retrieved_docs_1[i].page_content)

In [None]:
results = co.rerank(query=query_1, documents=retrieved_docs_query1, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.

In [None]:
results

In [None]:
print(query_1)

In [None]:
results = co.rerank(query=query_1, documents=retrieved_docs_query1, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
    print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
    print(f"Source Document: {retrieved_docs_1[r.index].metadata}")
    print(f"Document: {retrieved_docs_query1[r.index]}")
    print(f"Relevance Score: {r.relevance_score:.2f}")
    print("\n")

In [None]:
retrieved_docs_query2=[]
for i in range(len(retrieved_docs_2)):
  retrieved_docs_query2.append(retrieved_docs_2[i].page_content)

In [None]:
print(query_2)

In [None]:
results = co.rerank(query=query_2, documents=retrieved_docs_query2, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_2[r.index].metadata}")
  print(f"Document: {retrieved_docs_query2[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

In [None]:
retrieved_docs_query3=[]
for i in range(len(retrieved_docs_3)):
  retrieved_docs_query3.append(retrieved_docs_3[i].page_content)

print(query_3,"\n")
results = co.rerank(query=query_3, documents=retrieved_docs_query3, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_3[r.index].metadata}")
  print(f"Document: {retrieved_docs_query3[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

In [None]:
retrieved_docs_query4=[]
for i in range(len(retrieved_docs_4)):
  retrieved_docs_query4.append(retrieved_docs_4[i].page_content)

print(query_4,"\n")
results = co.rerank(query=query_4, documents=retrieved_docs_query4, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_4[r.index].metadata}")
  print(f"Document: {retrieved_docs_query4[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

In [None]:
retrieved_docs_query5=[]
for i in range(len(retrieved_docs_5)):
  retrieved_docs_query5.append(retrieved_docs_5[i].page_content)

print(query_5,"\n")
results = co.rerank(query=query_5, documents=retrieved_docs_query5, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_5[r.index].metadata}")
  print(f"Document: {retrieved_docs_query5[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

# Parent Document Retriever
Version 2 - Larger chunks

In [None]:
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever

child_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=hf
)
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)

In [None]:
# adding original documents before splitting or chunking as the retriever already does these
retriever.add_documents(documents, ids=None)

In [None]:
list(store.yield_keys())

## Query 1

In [None]:
query_1=" What are some challenges associated with data collection?"
sub_docs_1 = vectorstore.similarity_search(query_1)

In [None]:
print(sub_docs_1[0].page_content)

In [None]:
sub_docs_1

In [None]:
retrieved_docs_1 = retriever.get_relevant_documents(query_1)

In [None]:
len(retrieved_docs_1[0].page_content)

In [None]:
retrieved_docs_1

## Query 2

In [None]:
#query_1=" What are some challenges associated with data collection?"
query_2=" What are the advantages of the Apriori algorithm?"

sub_docs_2 = vectorstore.similarity_search(query_2)
retrieved_docs_2 = retriever.get_relevant_documents(query_2)
retrieved_docs_2

In [None]:
sub_docs_2

## Query 3

In [None]:
#query_1=" What are some challenges associated with data collection?"
#query_2=" What are the advantages of the Apriori algorithm?"
query_3=" What distinguishes training data in supervised learning, and what is its purpose? "

sub_docs_3 = vectorstore.similarity_search(query_3)
retrieved_docs_3 = retriever.get_relevant_documents(query_3)
retrieved_docs_3

## Query 4

In [None]:
#query_1=" What are some challenges associated with data collection?"
#query_2=" What are the advantages of the Apriori algorithm?"
#query_3=" What distinguishes training data in supervised learning, and what is its purpose? "
query_4=" What is cross-validation?"

sub_docs_4 = vectorstore.similarity_search(query_4)
retrieved_docs_4 = retriever.get_relevant_documents(query_4)
retrieved_docs_4

In [None]:
sub_docs_4

## Query 5

In [None]:
#query_1=" What are some challenges associated with data collection?"
#query_2=" What are the advantages of the Apriori algorithm?"
#query_3=" What distinguishes training data in supervised learning, and what is its purpose? "
#query_4=" What is cross-validation?"
query_5="What is the process of building the decision tree classifier, and how is it trained on the dataset?"

sub_docs_5 = vectorstore.similarity_search(query_5)
retrieved_docs_5 = retriever.get_relevant_documents(query_5)
retrieved_docs_5

In [None]:
sub_docs_5

## Query 6

In [None]:
query_6="What is the challenge associated with supervised learning?"

sub_docs_6 = vectorstore.similarity_search(query_6)
retrieved_docs_6 = retriever.get_relevant_documents(query_6)
retrieved_docs_6

## Query 7

In [None]:
query_7="What considerations should businesses keep in mind when applying market basket analysis to understand customer behavior?"

sub_docs_7 = vectorstore.similarity_search(query_7)
retrieved_docs_7 = retriever.get_relevant_documents(query_7)
retrieved_docs_7

## Query 8

In [None]:
query_8="How does the genie index contribute to decision tree construction, and why is it favored in practice, particularly in Python libraries?"

sub_docs_8 = vectorstore.similarity_search(query_8)
retrieved_docs_8 = retriever.get_relevant_documents(query_8)
retrieved_docs_8

## Query 9

In [None]:
query_9="What is the harmonic mean, and how does it enable the combination of precision and recall into a single metric?"

sub_docs_9 = vectorstore.similarity_search(query_9)
retrieved_docs_9 = retriever.get_relevant_documents(query_9)
retrieved_docs_9

## Query 10

In [None]:
query_10="What are the implications of choosing a large or small learning rate?"

sub_docs_10 = vectorstore.similarity_search(query_10)
retrieved_docs_10 = retriever.get_relevant_documents(query_10)
retrieved_docs_10

In [None]:
pip install cohere

# Co-here Re-ranking

In [None]:
import cohere
co = cohere.Client('w8CnnlzVol2aZEiirZNLUs0onAqXUUYBZCw2Oj7g')

## Query 1

In [None]:
retrieved_docs_query1=[]
for i in range(len(retrieved_docs_1)):
  retrieved_docs_query1.append(retrieved_docs_1[i].page_content)

In [None]:
results = co.rerank(query=query_1, documents=retrieved_docs_query1, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.

In [None]:
results

In [None]:
print(query_1)

In [None]:
#results = co.rerank(query=query_1, documents=retrieved_docs_query1, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
print(query_1,"\n")
for idx, r in enumerate(results.results):
    print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
    print(f"Source Document: {retrieved_docs_1[r.index].metadata}")
    print(f"Document: {retrieved_docs_query1[r.index]}")
    print(f"Relevance Score: {r.relevance_score:.2f}")
    print("\n")

## Query 2

In [None]:
retrieved_docs_query2=[]
for i in range(len(retrieved_docs_2)):
  retrieved_docs_query2.append(retrieved_docs_2[i].page_content)

In [None]:
print(query_2)

In [None]:
results = co.rerank(query=query_2, documents=retrieved_docs_query2, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
print(query_2,"\n")
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_2[r.index].metadata}")
  print(f"Document: {retrieved_docs_query2[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 3

In [None]:
retrieved_docs_query3=[]
for i in range(len(retrieved_docs_3)):
  retrieved_docs_query3.append(retrieved_docs_3[i].page_content)

print(query_3,"\n")
results = co.rerank(query=query_3, documents=retrieved_docs_query3, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_3[r.index].metadata}")
  print(f"Document: {retrieved_docs_query3[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 4

In [None]:
retrieved_docs_query4=[]
for i in range(len(retrieved_docs_4)):
  retrieved_docs_query4.append(retrieved_docs_4[i].page_content)

print(query_4,"\n")
results = co.rerank(query=query_4, documents=retrieved_docs_query4, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_4[r.index].metadata}")
  print(f"Document: {retrieved_docs_query4[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 5

In [None]:
retrieved_docs_query5=[]
for i in range(len(retrieved_docs_5)):
  retrieved_docs_query5.append(retrieved_docs_5[i].page_content)

print(query_5,"\n")
results = co.rerank(query=query_5, documents=retrieved_docs_query5, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_5[r.index].metadata}")
  print(f"Document: {retrieved_docs_query5[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 6

In [None]:
retrieved_docs_query6=[]
for i in range(len(retrieved_docs_6)):
  retrieved_docs_query6.append(retrieved_docs_6[i].page_content)

print(query_6,"\n")
results = co.rerank(query=query_6, documents=retrieved_docs_query6, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_6[r.index].metadata}")
  print(f"Document: {retrieved_docs_query6[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 7

In [None]:
retrieved_docs_query7=[]
for i in range(len(retrieved_docs_7)):
  retrieved_docs_query7.append(retrieved_docs_7[i].page_content)

print(query_7,"\n")
results = co.rerank(query=query_7, documents=retrieved_docs_query7, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_7[r.index].metadata}")
  print(f"Document: {retrieved_docs_query7[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 8

In [None]:
retrieved_docs_query8=[]
for i in range(len(retrieved_docs_8)):
  retrieved_docs_query8.append(retrieved_docs_8[i].page_content)

print(query_8,"\n")
results = co.rerank(query=query_8, documents=retrieved_docs_query8, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_8[r.index].metadata}")
  print(f"Document: {retrieved_docs_query8[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 9

In [None]:
retrieved_docs_query9=[]
for i in range(len(retrieved_docs_9)):
  retrieved_docs_query9.append(retrieved_docs_9[i].page_content)

print(query_9,"\n")
results = co.rerank(query=query_9, documents=retrieved_docs_query9, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_9[r.index].metadata}")
  print(f"Document: {retrieved_docs_query9[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

## Query 10

In [None]:
retrieved_docs_query10=[]
for i in range(len(retrieved_docs_10)):
  retrieved_docs_query10.append(retrieved_docs_10[i].page_content)

print(query_10,"\n")
results = co.rerank(query=query_10, documents=retrieved_docs_query10, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results.results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Source Document: {retrieved_docs_10[r.index].metadata}")
  print(f"Document: {retrieved_docs_query10[r.index]}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")