In [26]:
# Import necessary libraries
from flask import request, jsonify
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from langchain_community.llms import Ollama
from langchain.vectorstores import Qdrant, Mem
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.callbacks import get_openai_callback
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document


In [31]:
# Download the PDF
!wget -O world_geo.pdf https://www.iipa.org.in/upload/world_geo.pdf

# Load the PDF
loader = PyPDFLoader('world_geo.pdf')
documents = loader.load()

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
texts = text_splitter.split_documents(documents)

# Initialize the embeddings
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

# Connect to the Qdrant server
url = "http://localhost:6333"
qdrant = Qdrant.from_documents(
    texts,
    embeddings,
    url=url,
    prefer_grpc=False,
    collection_name="rag_search",
    force_recreate=True,
)



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


--2024-02-22 17:30:29--  https://www.iipa.org.in/upload/world_geo.pdf
Resolving www.iipa.org.in (www.iipa.org.in)... 192.124.249.162
Connecting to www.iipa.org.in (www.iipa.org.in)|192.124.249.162|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1900956 (1.8M) [application/pdf]
Saving to: ‘world_geo.pdf’


2024-02-22 17:30:29 (5.08 MB/s) - ‘world_geo.pdf’ saved [1900956/1900956]



In [35]:
# Define the search query
search_query = "what is the diameter of the sun"

# Initialize the SentenceTransformer with a specific model
encoder = SentenceTransformer('all-MiniLM-L6-v2')

# Encode the search query into a vector
query_embedding = encoder.encode(search_query)

# Connect to the Qdrant server
client = QdrantClient("localhost", port=6333)

# Search the collection for documents similar to the query
docs = client.search(
    collection_name="rag_search",
    query_vector=query_embedding.tolist(),
    limit=10,
)

# Extract the payloads from the search results
payloads = [Document(**result.payload) for result in docs]


# Load the question answering chain
llm = Ollama(model="llama2")
chain = load_qa_chain(llm, chain_type="stuff")



# Invoke the chain with the payloads and the question
with get_openai_callback() as cb:
    response = chain.invoke( question=search_query, input={"question": search_query, "input_documents": payloads})

print(response["output_text"])

everything good till here ... 
{'question': 'what is the diameter of the sun', 'input_documents': [Document(page_content="8 \n 2. Stars, Sun & Planets  \nThe Sun is at the centre of our solar system and also the largest objects of our solar system. \nHowever, it is an average star. It is roughly 109 times the size of Earth. The Sun has a \ndiameter of 1,392,000 kilometres. It comprises 99.8% of the mass of the solar system. It isn't \nthe hottest, it isn't the coolest, and it isn't the oldest. Nor is it brightest, biggest, etc. The \nsurface temperature of sun is 60000 degrees Celsius. It is lar gely made up of hydrogen gas, \nwith a minor quantity of helium thrown in for good measure. The Sun accounts for 99.85% of \nall the matter of the solar system.  \nThe Sun is the solar system's closest star. It belongs to the Milky Way galaxy. It's thought to \nbe more than 4 billion years old. The Sun is a yellow dwarf, a medium -sized star. As it rotates \naround the galaxy, the Sun spins gen