In [17]:
# Import necessary libraries
from flask import request, jsonify
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from langchain_community.llms import Ollama
from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.callbacks import get_openai_callback
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
from langchain import VectorDBQA

In [None]:
# Download the PDF
!wget -O world_geo.pdf https://www.iipa.org.in/upload/world_geo.pdf


In [19]:

# Load the PDF
loader = PyPDFLoader('world_geo.pdf')
documents = loader.load()

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
texts = text_splitter.split_documents(documents)


embeddings = HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
)

doc_store = Qdrant.from_documents(
    texts, embeddings, url="http://localhost:6333", collection_name="simple_rag_search"
)



In [None]:

embeddings = HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
)

# doc_store = Qdrant.similarity_search_by_vector(
#      texts, embeddings, url="http://localhost:6333", collection_name="simple_rag_search"
# )

client = QdrantClient("localhost", port=6333)
collection_name = "simple_rag_search"
doc_store = Qdrant(client, collection_name, embeddings)

# Load the question answering chain
llm = Ollama(model="gemma:2b")
qa = VectorDBQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    vectorstore=doc_store,
    return_source_documents=False,
)


response = qa.run("tell me about sun?")

print(response)