In [None]:
import ollama
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel
import os

# Configuration
MONGODB_URI = os.getenv("MONGODB_URI", "mongodb+srv://<username>:<password>@cluster0.mongodb.net/?retryWrites=true&w=majority")
DB_NAME = "rag_db"
COLLECTION_NAME = "documents"
EMBEDDING_MODEL = "Alibaba-NLP/gte-multilingual-base"
LLM_MODEL = "gemma3:12b-it-qat"
EMBEDDING_DIM = 768  # Dimension for gte-multilingual-base

# Connect to MongoDB
client = MongoClient(MONGODB_URI)
db = client[DB_NAME]
collection = db[COLLECTION_NAME]

# Function to create vector search index (run once)
def create_vector_search_index():
    index_model = SearchIndexModel(
        definition={
            "mappings": {
                "dynamic": True,
                "fields": {
                    "embedding": {
                        "type": "knnVector",
                        "dimensions": EMBEDDING_DIM,
                        "similarity": "cosine"
                    }
                }
            }
        },
        name="vector_index"
    )
    collection.create_search_index(index_model)
    print("Vector search index created.")

# Function to generate embeddings using Ollama
def generate_embedding(text):
    response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
    return response['embedding']

# Function to ingest documents
def ingest_documents(documents):
    for doc in documents:
        embedding = generate_embedding(doc['content'])
        doc['embedding'] = embedding
        collection.insert_one(doc)
    print(f"Ingested {len(documents)} documents.")

# Function to perform vector search
def vector_search(query, top_k=5):
    query_embedding = generate_embedding(query)
    pipeline = [
        {
            "$vectorSearch": {
                "index": "vector_index",
                "path": "embedding",
                "queryVector": query_embedding,
                "numCandidates": 100,
                "limit": top_k
            }
        },
        {
            "$project": {
                "_id": 0,
                "content": 1,
                "score": {"$meta": "vectorSearchScore"}
            }
        }
    ]
    results = list(collection.aggregate(pipeline))
    return results

# Function to generate RAG response
def rag_query(query):
    # Retrieve relevant documents
    retrieved_docs = vector_search(query)
    context = "\n\n".join([doc['content'] for doc in retrieved_docs])
    
    # Construct prompt
    prompt = f"""
    Context:
    {context}

    Question: {query}

    Answer the question based on the context provided.
    """
    
    # Call LLM using Ollama
    response = ollama.chat(
        model=LLM_MODEL,
        messages=[{"role": "user", "content": prompt}]
    )
    return response['message']['content']

# Example usage
if __name__ == "__main__":
    # Uncomment to create index (run once)
    # create_vector_search_index()
    
    # Sample documents
    sample_docs = [
        {"content": "MongoDB is a NoSQL database that supports vector search for AI applications."},
        {"content": "Ollama is a tool for running large language models locally."},
        {"content": "RAG stands for Retrieval Augmented Generation, enhancing LLMs with external knowledge."}
    ]
    
    # Ingest sample documents (run once)
    # ingest_documents(sample_docs)
    
    # Query example
    query = "What is RAG?"
    response = rag_query(query)
    print("RAG Response:")
    print(response)