Install required packages for the RAG system.

1. Langgraph
2. Langchain
3. Qdrant
4. Gradio

In [2]:
import os

#Configuration
DOCS_DIR = "docs"  # Directory containing your PDF's files
MARKDOWN_DIR = "markdown" # Directory containing the pdfs converted to markdown
PARENT_STORE_PATH = "parent_store" # Directory for parent chunk JSON files
CHILD_COLLECTION = "document_child_chunks"

# Create directories if they don't exist
os.makedirs(DOCS_DIR, exist_ok=True)
os.makedirs(MARKDOWN_DIR, exist_ok=True)
os.makedirs(PARENT_STORE_PATH, exist_ok=True)

In [3]:
# Initialize LLM Setup

from langchain_ollama import ChatOllama
llm = ChatOllama(model="gpt-oss:20b-cloud",temperature=0)


In [4]:
# Embeddings setup 

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_qdrant.fastembed_sparse import FastEmbedSparse

# Dense embeddings for semantic understanding
dense_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)



# Sparse embeddings for keyword matching
sparse_embeddings = FastEmbedSparse(
    model_name="Qdrant/bm25"
)

Fetching 18 files: 100%|██████████| 18/18 [00:01<00:00, 13.42it/s]


In [None]:
# Vector Database Setup 
from qdrant_client import QdrantClient
from qdrant_client.http import models as qdrant_models
from langchain_qdrant import QdrantVectorStore
from langchain_qdrant.qdrant import RetrievalMode


# Initialize Client
if os.path.exists("qdrant_db"):
    print("✓ Qdrant client already exists")
else:
    client = QdrantClient(path="qdrant_db")

#Get Embedding Dimension
embedding_dimension = len(dense_embeddings.embed_query("test"))

def ensure_collections(collection_name):
    if not client.collection_exists(collection_name):
        client.create_collection(
            collection_name=collection_name,
            vectors_config=qdrant_models.VectorParams(
                size = embedding_dimension,
                distance = qdrant_models.Distance.COSINE
            ),
            sparse_vectors_config={
                "sparse": qdrant_models.SparseVectorParams()
            },
        )
        print(f"✓ Created collection: {collection_name}")
    else:
        print(f"✓ Collection {collection_name} already exists")