In [1]:
import getpass, os, pymongo, pprint
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.core.settings import Settings
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.vector_stores import MetadataFilter, MetadataFilters, ExactMatchFilter, FilterOperator
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch

In [2]:
import yaml
import pymongo
import json
from sentence_transformers import SentenceTransformer
import numpy as np

In [3]:
%env OPENAI_API_KEY="openai_key"

env: OPENAI_API_KEY="sk-proj-VcTbrBz9RO6RP8znSxqZT3BlbkFJmx4Q3HCyxyduxKh1HPub"


In [4]:
os.environ["OPENAI_API_KEY"] = 'open_ai_key'

In [5]:
# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
# ATLAS_CONNECTION_STRING = getpass.getpass("MongoDB Atlas SRV Connection String:")


In [6]:
with open('../config.yaml', 'r') as stream:
    config = yaml.safe_load(stream)

# Load the configuration from the config.yaml file
mongo_uri = config['mongodb']['mongo_uri']
db_name = config['mongodb']['llama_db_name']
collection_name = config['mongodb']['llama_collection_name']

In [8]:
Settings.llm = OpenAI()
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.chunk_size = 100
Settings.chunk_overlap = 10

In [9]:
sample_data = SimpleDirectoryReader(input_files=["../data/mongodb_atlas_best_practices.pdf"]).load_data()
# Print the first document
sample_data[0]

ValueError: File ../data/mongodb_atlas_best_practices.pdf does not exist.

In [16]:
# Connect to your Atlas cluster
mongodb_client = pymongo.MongoClient(mongo_uri)

# Instantiate the vector store
atlas_vector_search = MongoDBAtlasVectorSearch(
    mongodb_client,
    db_name = "llamaindex_db",
    collection_name = "test",
    index_name = "llama_vector_index"
)
vector_store_context = StorageContext.from_defaults(vector_store=atlas_vector_search)

In [18]:
vector_store_index = VectorStoreIndex.from_documents(
   sample_data, storage_context=vector_store_context, show_progress=True
)

Parsing nodes:   0%|          | 0/21 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/262 [00:00<?, ?it/s]

In [19]:
retriever = vector_store_index.as_retriever(similarity_top_k=3)
nodes = retriever.retrieve("MongoDB Atlas security")

for node in nodes:
    print(node)

Node ID: 4935fc2e-eba3-4124-af8f-ce2f2f83f97a
Text: MongoD B Atlas provides: •Security f eatures to protect access
to your data •Built in replication for always-on availability ,
tolerating complete data center failure •Backups and point in time
recovery to protect against data corruption •Fine-grained monitoring
to let you know when to scale.
Score:  0.933

Node ID: 0756f0f7-b022-44f8-b3fe-f5ddb217b152
Text: Protect data in motion over the network and at rest in
persistent storage To ensure a secure system right out of the b ox,
authentication and I P Address whitelisting are automatically enabled.
Review the security section of the MongoD B Atlas documentation to
learn more ab out eac h of the security features discussed below .
Score:  0.932

Node ID: 595e8a42-3b21-4ed3-9b82-61a9c3227d01
Text: MongoD B Atlas f eatures e xtensive capabilities to def end,
detect, and control access to MongoD B, off ering among the most
complete security controls of any modern database: •User Rights
Ma

### Basic RAG

In [20]:
# Instantiate Atlas Vector Search as a retriever
vector_store_retriever = VectorIndexRetriever(index=vector_store_index, similarity_top_k=5)

# Pass the retriever into the query engine
query_engine = RetrieverQueryEngine(retriever=vector_store_retriever)

# Prompt the LLM
response = query_engine.query('How can I secure my MongoDB Atlas cluster?')

print(response)
print("\nSource documents: ")
pprint.pprint(response.source_nodes)

You can secure your MongoDB Atlas cluster by enabling authentication and IP Address whitelisting, maintaining separate cluster security configurations, creating different alert settings for different environments, deploying into different regions or cloud platforms, and ensuring that data volumes are always encrypted when deploying on AWS, Azure, and GCP.

Source documents: 
[NodeWithScore(node=TextNode(id_='0756f0f7-b022-44f8-b3fe-f5ddb217b152', embedding=None, metadata={'page_label': '18', 'file_name': 'mongodb_atlas_best_practices.pdf', 'file_path': '../data/mongodb_atlas_best_practices.pdf', 'file_type': 'application/pdf', 'file_size': 512653, 'creation_date': '2024-05-22', 'last_modified_date': '2024-05-22'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<

### RAG with Filtering

In [21]:
# Specify metadata filters
metadata_filters = MetadataFilters(
   filters=[ExactMatchFilter(key="metadata.page_label", value="17")]
)

# Instantiate Atlas Vector Search as a retriever
vector_store_retriever = VectorIndexRetriever(index=vector_store_index, filters=metadata_filters, similarity_top_k=5)

# Pass the retriever into the query engine
query_engine = RetrieverQueryEngine(retriever=vector_store_retriever)

# Prompt the LLM
response = query_engine.query('How can I secure my MongoDB Atlas cluster?')

print(response)
print("\nSource documents: ")
pprint.pprint(response.source_nodes)

You can secure your MongoDB Atlas cluster by following best practices such as enabling authentication, configuring network access controls, enabling encryption at rest and in transit, implementing role-based access control, regularly updating and patching your MongoDB version, and monitoring your cluster for any suspicious activity.

Source documents: 
[NodeWithScore(node=TextNode(id_='a2e9cd35-e1a6-4bfd-968b-3766454822ae', embedding=None, metadata={'page_label': '17', 'file_name': 'mongodb_atlas_best_practices.pdf', 'file_path': '../data/mongodb_atlas_best_practices.pdf', 'file_type': 'application/pdf', 'file_size': 512653, 'creation_date': '2024-05-22', 'last_modified_date': '2024-05-22'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE