# MongoDB Atlas + Voyage AI + OpenAI RAG Solution

This notebook demonstrates a complete RAG (Retrieval-Augmented Generation) pipeline that:
1. Ingests datasets (manuals and interviews) into MongoDB Atlas
2. Generates embeddings using Voyage AI 3 model
3. Creates vector indexes for similarity search
4. Implements a RAG solution using OpenAI API for answering questions with context

## Prerequisites
- MongoDB Atlas cluster with Atlas Vector Search enabled
- Voyage AI API key
- OpenAI API key
- Python packages: pymongo, voyageai, openai, pandas, python-dotenv

## Section 1: Import Required Libraries and Configure Environment

## Setup: Configure Virtual Environment and Install Dependencies

In [14]:
# Installing a libraries' directly in the notebook
%pip install dotenv pymongo voyageai openai

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import json
import sys
from pathlib import Path
from typing import Optional, List, Dict, Any
from pymongo import MongoClient
from pymongo.errors import ServerSelectionTimeoutError
from pymongo.operations import SearchIndexModel
from dotenv import load_dotenv

import voyageai
#import openai
from openai import OpenAI


# Load environment variables
load_dotenv()

# Configure MONGODB URI from environment variables
MONGODB_URI = os.getenv("MONGODB_URI")
DATABASE_NAME = os.getenv("DATABASE_NAME")

# Configure LLM endpoint and API keys from environment variables
LLM_API_ENDPOINT = os.getenv("LLM_API_ENDPOINT")
LLM_API_KEY = os.getenv("LLM_API_KEY")

# Change the base URL
VOYAGE_API_ENDPOINT = os.getenv("VOYAGE_API_ENDPOINT")
VOYAGE_API_KEY = os.getenv("VOYAGE_API_KEY")
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")

# Validate that API keys are available
print("Configuration Status:")
print(f"✓ MongoDB URI configured: {bool(MONGODB_URI)}")
print(f"✓ Voyage AI API key configured: {bool(VOYAGE_API_KEY)}")
print(f"✓ Voyage AI Embedding model configured: {bool(EMBEDDING_MODEL)}")
print(f"✓ OpenAI API key configured: {bool(LLM_API_KEY)}")

# Initialize API clients
if VOYAGE_API_KEY:
    #voyageai.base_url = VOYAGE_API_ENDPOINT
    voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
    
if LLM_API_KEY:
    #openai.base_url = LLM_API_ENDPOINT
    llm_client = OpenAI( api_key=LLM_API_KEY )

Configuration Status:
✓ MongoDB URI configured: True
✓ Voyage AI API key configured: True
✓ OpenAI API key configured: True


## Section 2: Connect to MongoDB Atlas

In [22]:
def connect_to_mongodb(uri: str, db_name: str = DATABASE_NAME) -> tuple:
    """
    Connect to MongoDB Atlas cluster
    
    Args:
        uri: MongoDB connection string
        db_name: Database name to use
        
    Returns:
        Tuple of (client, database)
    """
    try:
        client = MongoClient(uri, serverSelectionTimeoutMS=5000)
        # Verify connection
        client.admin.command('ping')
        db = client[db_name]
        print(f"✓ Successfully connected to MongoDB Atlas")
        print(f"✓ Database: {db_name}")
        return client, db
    except ServerSelectionTimeoutError:
        print("✗ Failed to connect to MongoDB Atlas. Check your connection string.")
        raise
    except Exception as e:
        print(f"✗ Connection error: {e}")
        raise

# Connect to MongoDB
if MONGODB_URI:
    mongo_client, db = connect_to_mongodb(MONGODB_URI)
    print(f"✓ Available collections: {db.list_collection_names()}")
else:
    print("✗ MONGODB_URI not configured. Please set the environment variable.")

✓ Successfully connected to MongoDB Atlas
✓ Database: pmd
✓ Available collections: ['manuals', 'workorders', 'interviews']


## Section 3: Load and Ingest Datasets from Data Folder

In [23]:
def load_json_dataset(file_path: str) -> List[Dict[str, Any]]:
    """
    Load a JSON dataset from file
    
    Args:
        file_path: Path to the JSON file
        
    Returns:
        List of documents
    """
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        print(f"✓ Loaded {file_path}: {len(data)} documents")
        return data if isinstance(data, list) else [data]
    except FileNotFoundError:
        print(f"✗ File not found: {file_path}")
        return []
    except json.JSONDecodeError as e:
        print(f"✗ Error decoding JSON from {file_path}: {e}")
        return []

# Locate the data folder
data_folder = Path("./data")
if not data_folder.exists():
    print(f"✗ Data folder not found at {data_folder.absolute()}")
    # Try alternative path
    alt_path = Path("../data")
    if alt_path.exists():
        data_folder = alt_path
    else:
        print("Please ensure the data folder exists in the workspace root")

print(f"\nLoading datasets from: {data_folder.absolute()}")
print(f"Available files: {list(data_folder.glob('*'))}\n")

# Load datasets
manuals_data = load_json_dataset(str(data_folder / "manuals.json"))
interviews_data = load_json_dataset(str(data_folder / "interviews.json"))
workorders_data = load_json_dataset(str(data_folder / "workorders.json"))

print(f"\nDataset Summary:")
print(f"  - Manuals: {len(manuals_data)} documents")
print(f"  - Interviews: {len(interviews_data)} documents")
print(f"  - Work Orders: {len(workorders_data)} documents")


Loading datasets from: /Users/mancilla/sw/demo-rag-pm/data
Available files: [PosixPath('data/manuals.json'), PosixPath('data/workorders.json'), PosixPath('data/maintenance_staff.json'), PosixPath('data/interviews.json'), PosixPath('data/inventory.json')]

✓ Loaded data/manuals.json: 6 documents
✓ Loaded data/interviews.json: 5 documents
✓ Loaded data/workorders.json: 10 documents

Dataset Summary:
  - Manuals: 6 documents
  - Interviews: 5 documents
  - Work Orders: 10 documents


In [24]:
def ingest_data_to_mongodb(db, collection_name: str, documents: List[Dict]) -> bool:
    """
    Ingest documents into a MongoDB collection
    
    Args:
        db: MongoDB database object
        collection_name: Name of the collection
        documents: List of documents to insert
        
    Returns:
        True if successful, False otherwise
    """
    if not documents:
        print(f"✗ No documents to ingest into {collection_name}")
        return False
    
    try:
        collection = db[collection_name]
        # Drop existing collection to start fresh
        collection.drop()
        
        # Insert documents
        result = collection.insert_many(documents)
        print(f"✓ Ingested {len(result.inserted_ids)} documents into '{collection_name}'")
        return True
    except Exception as e:
        print(f"✗ Error ingesting data into {collection_name}: {e}")
        return False

# Ingest datasets into MongoDB
if MONGODB_URI and manuals_data:
    ingest_data_to_mongodb(db, "manuals", manuals_data)
    
if MONGODB_URI and interviews_data:
    ingest_data_to_mongodb(db, "interviews", interviews_data)

if MONGODB_URI and workorders_data:
    ingest_data_to_mongodb(db, "workorders", workorders_data)

print("\n✓ Data ingestion complete")

✓ Ingested 6 documents into 'manuals'
✓ Ingested 5 documents into 'interviews'
✓ Ingested 10 documents into 'workorders'

✓ Data ingestion complete


## Section 4: Generate Embeddings Using Voyage AI 3

In [25]:
def extract_text_for_embedding(document: Dict[str, Any], text_fields: List[str] = None) -> str:
    """
    Extract text content from a document for embedding
    
    Args:
        document: The document to extract text from
        text_fields: List of field names to extract (if None, uses sensible defaults)
        
    Returns:
        Combined text string
    """
    if text_fields is None:
        # Default fields to check for text content
        text_fields = ['text', 'title', 'observations']
    
    texts = []
    for field in text_fields:
        if field in document and document[field]:
            value = document[field]
            if isinstance(value, str):
                texts.append(value)
            elif isinstance(value, list):
                texts.extend([str(v) for v in value if v])
    
    return " ".join(texts)

def generate_embeddings_batch(texts: List[str], model: str = EMBEDDING_MODEL) -> List[List[float]]:
    """
    Generate embeddings for a batch of texts using Voyage AI
    
    Args:
        texts: List of texts to embed
        model: Voyage AI model to use
        
    Returns:
        List of embedding vectors
    """
    if not texts:
        return []
    
    try:
        # Create embeddings using Voyage AI
        response = voyage_client.embed(
            texts=texts,
            model=model,
            input_type="document"
        )
        embeddings = [e for e in response.embeddings]
        print(f"✓ Generated {len(embeddings)} embeddings using {model}")
        return embeddings
    except Exception as e:
        print(f"✗ Error generating embeddings: {e}")
        return []

# Test embedding generation with a sample
print("Testing Voyage AI embedding generation...")
test_texts = ["This is a test document", "Another test text for embeddings"]
test_embeddings = generate_embeddings_batch(test_texts)
if test_embeddings:
    print(f"✓ Sample embedding dimension: {len(test_embeddings[0])}")
else:
    print("✗ Embedding generation failed. Check API key and connectivity.")

Testing Voyage AI embedding generation...
✗ Error generating embeddings: Model voyageai-3-large is not supported. Supported models are ['voyage-4-large', 'voyage-4', 'voyage-4-lite', 'voyage-large-2-instruct', 'voyage-law-2', 'voyage-code-2', 'voyage-02', 'voyage-2', 'voyage-01', 'voyage-lite-01', 'voyage-lite-01-instruct', 'voyage-lite-02-instruct', 'voyage-multilingual-2', 'voyage-large-2'].
✗ Embedding generation failed. Check API key and connectivity.


## Section 5: Update Collections with Embeddings Field

In [10]:
def add_embeddings_to_collection(db, collection_name: str, batch_size: int = 10) -> bool:
    """
    Generate embeddings for all documents in a collection and update them
    
    Args:
        db: MongoDB database object
        collection_name: Name of the collection to process
        batch_size: Number of documents to process per batch
        
    Returns:
        True if successful, False otherwise
    """
    try:
        collection = db[collection_name]
        documents = list(collection.find({}))
        
        if not documents:
            print(f"✗ No documents found in collection '{collection_name}'")
            return False
        
        print(f"\nProcessing {len(documents)} documents in '{collection_name}'...")
        
        # Process documents in batches
        for i in range(0, len(documents), batch_size):
            batch = documents[i:i + batch_size]
            
            # Extract text from documents
            texts = [extract_text_for_embedding(doc) for doc in batch]
            
            # Generate embeddings for the batch
            embeddings = generate_embeddings_batch(texts)
            
            if not embeddings or len(embeddings) != len(batch):
                print(f"✗ Embedding generation failed for batch {i//batch_size + 1}")
                continue
            
            # Update documents with embeddings
            for doc, embedding in zip(batch, embeddings):
                collection.update_one(
                    {"_id": doc["_id"]},
                    {"$set": {"embeddings": embedding}}
                )
            
            print(f"  ✓ Processed batch {i//batch_size + 1}/{(len(documents) + batch_size - 1)//batch_size}")
        
        # Verify embeddings were added
        docs_with_embeddings = collection.count_documents({"embeddings": {"$exists": True}})
        print(f"✓ Updated {docs_with_embeddings} documents with embeddings in '{collection_name}'")
        return True
        
    except Exception as e:
        print(f"✗ Error adding embeddings to {collection_name}: {e}")
        return False

# Add embeddings to both collections
if MONGODB_URI and test_embeddings:  # Only proceed if embeddings work
    print("=" * 60)
    print("GENERATING AND ADDING EMBEDDINGS TO COLLECTIONS")
    print("=" * 60)
    
    add_embeddings_to_collection(db, "manuals", batch_size=5)
    add_embeddings_to_collection(db, "interviews", batch_size=5)
    add_embeddings_to_collection(db, "workorders", batch_size=10)
else:
    print("✗ Skipping embedding generation - API not configured or failed")

GENERATING AND ADDING EMBEDDINGS TO COLLECTIONS

Processing 6 documents in 'manuals'...
✓ Generated 5 embeddings using voyage-3-large
  ✓ Processed batch 1/2
✓ Generated 1 embeddings using voyage-3-large
  ✓ Processed batch 2/2
✓ Updated 6 documents with embeddings in 'manuals'

Processing 5 documents in 'interviews'...
✓ Generated 5 embeddings using voyage-3-large
  ✓ Processed batch 1/1
✓ Updated 5 documents with embeddings in 'interviews'

Processing 10 documents in 'workorders'...
✓ Generated 10 embeddings using voyage-3-large
  ✓ Processed batch 1/1
✓ Updated 10 documents with embeddings in 'workorders'


## Section 6: Create Vector Indexes in MongoDB

In [11]:
def create_vector_search_index(db, collection_name: str, embedding_dim: int = 1024) -> bool:
    """
    Create a vector search index on the embeddings field
    
    Note: This requires MongoDB Atlas with Atlas Vector Search enabled
    
    Args:
        db: MongoDB database object
        collection_name: Name of the collection
        embedding_dim: Dimension of the embeddings
        
    Returns:
        True if successful, False otherwise
    """
    try:
        collection = db[collection_name]
        
        # Vector search index definition for Atlas Vector Search


        search_index_model = SearchIndexModel(
                                    definition={
                                        "fields": [
                                        {
                                            "type": "vector",
                                            "path": "embeddings",
                                            "numDimensions": embedding_dim,
                                            "similarity": "cosine"
                                        }
                                        ]
                                    },
                                    name="vector_index",
                                    type="vectorSearch"
                                )


        
        # Create the index via the collection's create_search_indexes method
        # Note: This method requires MongoDB Python driver >= 4.6
        try:
            # Try using the newer search indexes API
            search_indexes = collection.list_search_indexes()
            existing_indexes = [idx.get('name') for idx in search_indexes]
            for index in existing_indexes:
                print(index)
            
            if 'vector_index' not in existing_indexes:
                collection.create_search_index(model=search_index_model)
                print(f"✓ Created vector search index for '{collection_name}'")
            else:
                print(f"✓ Vector search index already exists for '{collection_name}'")
                
        except AttributeError:
            # Fallback for older driver versions
            print(f"⚠ Vector search index creation requires MongoDB Atlas with Vector Search enabled")
            print(f"  Manually create the index in MongoDB Atlas UI with this definition:")
            print(f"  {json.dumps(index_definition, indent=2)}")
        
        return True
        
    except Exception as e:
        print(f"⚠ Note: {e}")
        print(f"  Vector indexes should be created in MongoDB Atlas UI")
        return False

# Create vector indexes for both collections
if MONGODB_URI and test_embeddings:
    print("\n" + "=" * 60)
    print("CREATING VECTOR SEARCH INDEXES")
    print("=" * 60)
    
    # Use embedding dimension from test
    embedding_dim = len(test_embeddings[0]) if test_embeddings else 1024
    
    create_vector_search_index(db, "manuals", embedding_dim)
    create_vector_search_index(db, "interviews", embedding_dim)
    create_vector_search_index(db, "workorders", embedding_dim)
else:
    print("✗ Skipping index creation - prerequisites not met")


CREATING VECTOR SEARCH INDEXES
✓ Created vector search index for 'manuals'
✓ Created vector search index for 'interviews'
✓ Created vector search index for 'workorders'


## Section 7: Implement RAG Solution with OpenAI

In [12]:
def vector_search_mongodb(db, collection_name: str, query_vector: List[float], num_results: int = 5) -> List[Dict]:
    """
    Perform vector similarity search on MongoDB collection
    
    Args:
        db: MongoDB database object
        collection_name: Name of the collection to search
        query_vector: Query embedding vector
        num_results: Number of results to return
        
    Returns:
        List of matching documents with similarity scores
    """
    try:
        collection = db[collection_name]
        
        # Use aggregation pipeline with vector search
        pipeline = [
            {
                "$vectorSearch": {
                    "index": "vector_index",
                    'queryVector': query_vector,
                    'numCandidates': 10, 
                    'limit': num_results
                }
            },
            {
                "$project": {
                    "'score": {"$meta": "vectorSearchScore"},
                    "document": "$$ROOT"
                }
            },
            {
                "$limit": num_results
            }
        ]
        
        # Try standard vector search first
        try:
            results = list(collection.aggregate(pipeline))
            return results
        except:
            # Fallback to simpler approach if aggregation fails
            # This works with documents that have embeddings field
            results = []
            documents = list(collection.find({"embeddings": {"$exists": True}}))
            
            if not documents:
                return []
            
            # Calculate similarity scores using cosine similarity
            import numpy as np
            query_vec = np.array(query_vector)
            
            for doc in documents:
                if 'embeddings' in doc:
                    doc_vec = np.array(doc['embeddings'])
                    # Cosine similarity
                    similarity = np.dot(query_vec, doc_vec) / (np.linalg.norm(query_vec) * np.linalg.norm(doc_vec))
                    results.append({
                        'similarityScore': float(similarity),
                        'document': doc
                    })
            
            # Sort by similarity score and return top results
            results.sort(key=lambda x: x['similarityScore'], reverse=True)
            return results[:num_results]
            
    except Exception as e:
        print(f"✗ Error performing vector search: {e}")
        return []

def retrieve_context(db, query: str, num_results: int = 3) -> str:
    """
    Retrieve relevant context from both collections using vector search
    
    Args:
        db: MongoDB database object
        query: User query
        num_results: Number of results per collection
        
    Returns:
        Formatted context string for RAG
    """
    # Generate query embedding
    query_embedding = generate_embeddings_batch([query])
    
    if not query_embedding:
        return "No context available"
    
    query_vector = query_embedding[0]
    
    # Search both collections
    manual_results = vector_search_mongodb(db, "manuals", query_vector, num_results)
    interview_results = vector_search_mongodb(db, "interviews", query_vector, num_results)
    workorder_results = vector_search_mongodb(db, "workorders", query_vector, num_results)

    # Format context
    context = "Retrieved Context:\n\n"
    
    if manual_results:
        context += "=== From Manuals ===\n"
        for i, result in enumerate(manual_results, 1):
            doc = result.get('document', result)
            score = result.get('similarityScore', 0)
            text = extract_text_for_embedding(doc)[:500]  # Limit text length
            context += f"{i}. (Score: {score:.2f}) {text}...\n\n"
    
    if interview_results:
        context += "=== From Interviews ===\n"
        for i, result in enumerate(interview_results, 1):
            doc = result.get('document', result)
            score = result.get('similarityScore', 0)
            text = extract_text_for_embedding(doc)[:500]  # Limit text length
            context += f"{i}. (Score: {score:.2f}) {text}...\n\n"

    if workorder_results:
        context += "=== From Work Orders ===\n"
        for i, result in enumerate(workorder_results, 1):
            doc = result.get('document', result)
            score = result.get('similarityScore', 0)
            text = extract_text_for_embedding(doc)[:500]  # Limit text length
            context += f"{i}. (Score: {score:.2f}) {text}...\n\n"
    
    return context

# Initialize RAG system
class MongoDBOpenAIRAG:
    """RAG system using MongoDB Atlas and OpenAI"""
    
    def __init__(self, db, model: str = "gpt-3.5-turbo", temperature: float = 0.7):
        self.db = db
        self.model = model
        self.temperature = temperature
    
    def answer_question(self, query: str, num_context_docs: int = 3) -> Dict[str, Any]:
        """
        Answer a question using RAG approach
        
        Args:
            query: User question
            num_context_docs: Number of context documents to retrieve
            
        Returns:
            Dict with answer, context, and sources
        """
        # Retrieve context
        context = retrieve_context(self.db, query, num_context_docs)
        
        # Create prompt for OpenAI
        system_prompt = """You are a helpful assistant answering questions about maintenance systems and procedures.
Use the provided context to answer the question accurately. If the context doesn't contain relevant information, say so.
Always cite your sources from the context."""
        
        user_message = f"""Context Information:
{context}

Question: {query}

Please provide a helpful answer based on the context above."""
        
        try:
            # Call OpenAI API
            response = llm_client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_message}
                ],
                temperature=self.temperature,
                max_tokens=500
            )

            answer = response.choices[0].message.content

            return {
                'success': True,
                'query': query,
                'answer': answer,
                'context': context,
                'model': self.model
            }
        except Exception as e:
            return {
                'success': False,
                'query': query,
                'error': str(e),
                'context': context
            }

# Initialize RAG system
if MONGODB_URI and LLM_API_KEY:
    rag_system = MongoDBOpenAIRAG(db)
    print("✓ RAG system initialized successfully")
else:
    print("✗ RAG system initialization failed - missing API keys")

✓ RAG system initialized successfully


## Section 8: Query and Retrieve Results

In [13]:
def format_rag_response(response: Dict[str, Any]) -> str:
    """
    Format RAG response for display
    
    Args:
        response: Response dictionary from RAG system
        
    Returns:
        Formatted string for display
    """
    output = "\n" + "=" * 70 + "\n"
    output += f"QUERY: {response.get('query', 'N/A')}\n"
    output += "=" * 70 + "\n\n"
    
    if response.get('success'):
        output += f"ANSWER:\n{response.get('answer', 'No answer generated')}\n\n"
        output += "-" * 70 + "\n"
        output += f"CONTEXT SOURCES:\n{response.get('context', 'No context retrieved')}\n"
    else:
        output += f"ERROR: {response.get('error', 'Unknown error')}\n"
        output += f"RETRIEVED CONTEXT:\n{response.get('context', 'No context')}\n"
    
    output += "=" * 70 + "\n"
    return output

# Example queries to test the RAG system
example_queries = [
    "What are the maintenance procedures for critical equipment?",
    "How do you troubleshoot sensor failures?",
    "What is the recommended maintenance schedule?"
]

print("\n" + "=" * 70)
print("RAG SYSTEM DEMONSTRATION")
print("=" * 70)

if 'rag_system' in locals() and MONGODB_URI:
    print("\nRunning example queries...\n")
    
    for i, query in enumerate(example_queries, 1):
        print(f"\n--- Query {i} ---")
        response = rag_system.answer_question(query, num_context_docs=2)
        formatted = format_rag_response(response)
        print(formatted)
        
        # Add a small delay between API calls to avoid rate limiting
        import time
        if i < len(example_queries):
            time.sleep(2)
else:
    print("\n✗ RAG system not available for querying")
    print("  Ensure MONGODB_URI and OPENAI_API_KEY are configured")


RAG SYSTEM DEMONSTRATION

Running example queries...


--- Query 1 ---
✓ Generated 1 embeddings using voyage-3-large

QUERY: What are the maintenance procedures for critical equipment?

ANSWER:
Based on the information provided in the context, the maintenance procedures for critical equipment should include the following steps:

1. Weekly lubrication of bearings.
2. Weekly inspection of coolant lines for blockages or low levels.
3. Monthly full system alignment check.
4. Bi-monthly replacement of wear-prone tools.
5. Visual inspection of impeller condition for wear or degradation in case of fault code E12 (High Temperature).
6. Checking coolant system for blockages or low levels in case of high-temp alerts.
7. Verifying proper functioning of the lubrication system.
8. Monitoring vibration levels and investigating any alerts that indicate bearing misalignment.

These maintenance procedures are crucial for preventing issues such as high temperatures, tool wear, coolant blockages, and be

In [None]:
# Interactive query interface
def interactive_rag_query():
    """
    Interactive interface for querying the RAG system
    
    Usage:
        Call this function and enter your questions when prompted.
        Type 'exit' to quit.
    """
    if 'rag_system' not in locals():
        print("✗ RAG system not initialized")
        return
    
    print("\n" + "=" * 70)
    print("INTERACTIVE RAG QUERY INTERFACE")
    print("=" * 70)
    print("Enter your questions about the maintenance data.")
    print("Type 'exit' to quit.\n")
    
    while True:
        try:
            query = input("Your question: ").strip()
            
            if query.lower() == 'exit':
                print("Exiting RAG system...")
                break
            
            if not query:
                print("Please enter a question.\n")
                continue
            
            print("\nProcessing your query...\n")
            response = rag_system.answer_question(query, num_context_docs=3)
            formatted = format_rag_response(response)
            print(formatted)
            
            # Add delay to avoid rate limiting
            import time
            time.sleep(1)
            
        except KeyboardInterrupt:
            print("\n\nExiting RAG system...")
            break
        except Exception as e:
            print(f"Error processing query: {e}\n")

# Uncomment the line below to run the interactive interface
# interactive_rag_query()

print("\n" + "=" * 70)
print("NOTEBOOK COMPLETE")
print("=" * 70)
print("""
Summary of RAG Pipeline:
✓ Data loaded from data folder
✓ Data ingested into MongoDB Atlas
✓ Embeddings generated using Voyage AI 3
✓ Collections updated with embeddings
✓ Vector indexes created
✓ RAG system implemented with OpenAI

To use the interactive interface, uncomment and run the interactive_rag_query() cell.

To ask custom questions, use:
  result = rag_system.answer_question("Your question here")
  print(result)
""")

✗ RAG system not initialized

NOTEBOOK COMPLETE

Summary of RAG Pipeline:
✓ Data loaded from data folder
✓ Data ingested into MongoDB Atlas
✓ Embeddings generated using Voyage AI 3
✓ Collections updated with embeddings
✓ Vector indexes created
✓ RAG system implemented with OpenAI

To use the interactive interface, uncomment and run the interactive_rag_query() cell.

To ask custom questions, use:
  result = rag_system.answer_question("Your question here")
  print(result)

