# RAG with llm model(llama3.2)+qdrant

PDF Upload

    ↓

Extract Text from PDF(PyPDF2)

    ↓

Chunk Text (500 characters, 50 overlap)(RecursiveCharacterTextSplitter)

    ↓

Embed Chunks (MiniLM → 384-d vectors)(all-MiniLM-L6-v2)

    ↓

Store Embeddings in Qdrant (rag_collection)
__________________________________________________________________________________________________________
User Query

    ↓

Convert Query to Embedding (same model: all-MiniLM-L6-v2)

    ↓

Similarity Search in Qdrant (top-k = 3, score_threshold = 0.5)

    ↓

Retrieve Relevant Chunks (as context)

    ↓

Insert into Custom Prompt (context + question)

    ↓

LLM Inference (llama3.2)

    ↓

Generate Final Answer

    ↓

Return Answer + Retrieved Chunks


In [1]:
pip install langchain qdrant-client sentence-transformers ollama gradio pypdf2 tiktoken -U langchain-ollama langchain-huggingface langchain-qdrant


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.2")

In [3]:
from qdrant_client import QdrantClient
from langchain_qdrant import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings

qdrant_client = QdrantClient(host="localhost", port=6333)

embeddings_model= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
try:
    qdrant_client.create_collection(
        collection_name="rag_collection",
        vectors_config={
            "size": 384,
            "distance": "Cosine"
        },
        optimizers_config={"default_segment_number": 1},
        on_disk_payload=True
    )
except:
    pass

vectorstore = Qdrant(
    client=qdrant_client,
    collection_name="rag_collection",
    embeddings=embeddings_model
)
collections = qdrant_client.get_collections()
print(collections)


collections=[CollectionDescription(name='rag_collection')]


  vectorstore = Qdrant(


In [4]:
import gradio as gr
from PyPDF2 import PdfReader

def read_pdf(file):
    reader = PdfReader(file)
    text = "\n".join([page.extract_text() for page in reader.pages])
    return text


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_text(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )
    return splitter.split_text(text)

def store_embeddings(text_chunks, file_id):
    vectorstore.add_texts(
        texts=text_chunks,
        metadatas=[{"file_id": file_id}] * len(text_chunks)
    )

def delete_collection(collection_name="rag_collection"):
    try:
        qdrant_client.delete_collection(collection_name=collection_name)
        print(f"Collection '{collection_name}' deleted successfully.")
    except Exception as e:
        print(f"Error deleting collection: {e}")


#def delete_by_file_id(file_id):
#   qdrant_client.delete(
#       collection_name="rag_collection",
#        filter={
#            "must": [
#                {"key": "file_id", "match": {"value": file_id}}
#            ]
#        }
#    )


# from qdrant_client.http import models as rest
# #Function to delete existing chunks for a given file_idff
# def delete_all_vectors():
#     try:
#         qdrant_client.delete(
#             collection_name="rag_collection",
#             points_selector=rest.PointsSelector(
#                 filter=rest.Filter(must=[])  # Match all filter: deletes everything
#             )
#         )
#         print("All vectors deleted from the collection.")
#     except Exception as e:
#         print(f"Error deleting all vectors: {e}")

# def delete_existing_chunks(file_id):
#     try:
#         # Construct filter to match the file_id
#         filter_criteria = rest.Filter(
#             must=[
#                 rest.FieldCondition(
#                     key="file_id",
#                     match=rest.MatchValue(value=file_id)
#                 )
#             ]
#         )
        
#         # Delete chunks based on the filter criteria
#         delete_response = qdrant_client.delete(
#             collection_name="rag_collection",
#             filter=filter_criteria
#         )
        
#         # Print how many chunks were deleted
#         print(f"Deleted {delete_response.result.deleted_count} points for file_id '{file_id}'")
        
#     except Exception as e:
#         print(f"Error deleting chunks: {e}")

# def delete_existing_chunks(file_id):
#     try:
#         qdrant_client.delete(
#             collection_name="rag_collection",
#             points_selector=rest.FilterSelector(
#                 filter=rest.Filter(
#                     must=[
#                         rest.FieldCondition(
#                             key="file_id",
#                             match=rest.MatchValue(value=file_id)
#                         )
#                     ]
#                 )
#             )
#         )
#         print(f"Deleted old chunks for file_id '{file_id}'")
#     except Exception as e:
#         print(f"Error deleting chunks: {e}")

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an intelligent assistant. Use the following context to answer the question accurately.Don’t hallucinate and generate precise answers.

Context:
{context}

Question:
{question}

Answer:
""".strip()
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold": 0.5,"k": 5}),#need to add threshold and should the retrived chunks
    chain_type="stuff",
    return_source_documents=True,
    chain_type_kwargs={"prompt": custom_prompt}
)

Collection 'rag_collection' deleted successfully.
Chunk 1:networks for supervised or discriminative learning but also 
the deep networks for unsupervised or generative learning, 
and hybrid learning that can be used to solve a variety of 
real-world issues according to the nature of problems.
Deep learning, unlike traditional machine learning and 
data mining algorithms, can produce extremely high-level 
data representations from enormous amounts of raw data. As 
a result, it has provided an excellent solution to a variety of

Chunk 2:and comprehensive view on DL techniques considering the 
variations in real-world problems and tasks. To achieve 
our goal, we briefly discuss various DL techniques and 
present a taxonomy by taking into account three major 
categories: (i) deep networks for supervised or discrimi-
native learning that is utilized to provide a discrimina-
tive function in supervised deep learning or classifica-
tion applications; (ii) deep networks for unsupervised

Chunk

In [7]:
def process_and_store(file):
    file_id = file.name  # use file name as identifier
    status_message = f"Processing file: {file_id}\n"
    #delete_existing_chunks(file_id)
    delete_collection(collection_name="rag_collection")
    status_message += "All previous chunks deleted from the collection.\n"
    try:
        qdrant_client.create_collection(
        collection_name="rag_collection",
        vectors_config={
            "size": 384,
            "distance": "Cosine"
        },
        optimizers_config={"default_segment_number": 1},
        on_disk_payload=True
    )
    except:
        pass
    text = read_pdf(file)
    chunks = split_text(text)
    status_message += f"Text extracted and split into {len(chunks)} chunks.\n"
    store_embeddings(chunks, file_id)
    status_message += f"Successfully indexed {len(chunks)} chunks into the vectorstore."
    return status_message


def ask_question(query):
    #response = qa_chain.run(query)
    #return response
    response = qa_chain({"query": query})
    answer = response["result"]
    sources = response["source_documents"]

    chunks_text = "\n\n".join([f"Chunk {i+1}:{doc.page_content}" for i, doc in enumerate(sources)])

    # Combine answer + chunks to display
    full_output = f" Output:\n{answer}\n\n Retrieved Chunks:\n{chunks_text}"
    print(chunks_text)
    return full_output


In [None]:
with gr.Blocks() as app:
    with gr.Row():
        file_input = gr.File(label="Upload PDF")
        upload_btn = gr.Button("Process File")
    
    file_outputs=gr.Textbox(label="Status"),

    with gr.Row():
        question_input = gr.Textbox(label="Ask a question")
        ask_btn = gr.Button("Submit")

    answer_output = gr.Textbox(label="Answer")

    upload_btn.click(fn=process_and_store, inputs=file_input, outputs=file_outputs)
    ask_btn.click(fn=ask_question, inputs=question_input, outputs=answer_output)

app.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Collection 'rag_collection' deleted successfully.


  response = qa_chain({"query": query})


Chunk 1:India is a land of diverse cultures, rich heritage, and vibrant traditions. Here's a detailed 
overview of India's culture and heritage: 
India: A Tapestry of Culture and Heritage 
Introduction 
India, known as the cradle of civilization, boasts a history that spans thousands of years. Its 
culture is a blend of various customs, traditions, and practices that have evolved over 
centuries. The country's heritage is reﬂected in its architecture, festivals, music, dance,

Chunk 2:country's traditions, festivals, music, dance, cuisine, languages, and art reﬂect the 
harmonious coexistence of various inﬂuences and the vibrant spirit of its people. Exploring 
India is like embarking on a journey through time, where every corner has a story to tell and 
every experience is a celebration of life.

Chunk 3:landscape. 
Architecture 
Indian architecture is renowned for its grandeur and diversity. The country is home to 
numerous UNESCO World Heritage Sites, including the Taj Mahal, Qutub 