Tutorial link:
https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/

In [2]:
import getpass, os, pymongo, pprint
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pymongo import MongoClient

In [7]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
ATLAS_CONNECTION_STRING = getpass.getpass("MongoDB Atlas SRV Connection String:")

In [8]:
# Connect to your Atlas cluster
client = MongoClient(ATLAS_CONNECTION_STRING)

# Define collection and index name
db_name = "langchain_db"
collection_name = "test"
atlas_collection = client[db_name][collection_name]
vector_search_index = "vector_index"

In [9]:
# Load the PDF
loader = PyPDFLoader("https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP")
data = loader.load()

# Split PDF into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(data)

# Print the first document
docs[0]

Document(page_content='Mong oDB Atlas Best P racticesJanuary 20 19A MongoD B White P aper', metadata={'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 0})

In [10]:
# Create the vector store
vector_search = MongoDBAtlasVectorSearch.from_documents(
    documents = docs,
    embedding = OpenAIEmbeddings(disallowed_special=()),
    collection = atlas_collection,
    index_name = vector_search_index
)

In [11]:
query = "MongoDB Atlas security"
results = vector_search.similarity_search(query)

pprint.pprint(results)

[Document(page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba4e'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}),
 Document(page_content='MongoD B Atlas team are also monitoring the underlying\ninfrastructure, ensuring that it is always in a healthy state.\nApplication L ogs And Database L ogs', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba19'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 15}),
 Document(page_content='MongoD B.\nMongoD B Atlas incorporates best practices to help keep\nmanaged databases healthy and optimized. T hey ensure\noperational continuity by converting comple x manual tasks', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980b9fc'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/

Basic RAG

In [12]:
# Instantiate Atlas Vector Search as a retriever
retriever = vector_search.as_retriever(
   search_type = "similarity",
   search_kwargs = {"k": 10, "score_threshold": 0.75}
)

# Define a prompt template
template = """

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
"""
custom_rag_prompt = PromptTemplate.from_template(template)

llm = ChatOpenAI()

def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

# Construct a chain to answer questions on your data
rag_chain = (
   { "context": retriever | format_docs, "question": RunnablePassthrough()}
   | custom_rag_prompt
   | llm
   | StrOutputParser()
)

# Prompt the chain
question = "How can I secure my MongoDB Atlas cluster?"
answer = rag_chain.invoke(question)

print("Question: " + question)
print("Answer: " + answer)

# Return source documents
documents = retriever.get_relevant_documents(question)
print("\nSource documents:")
pprint.pprint(documents)

Question: How can I secure my MongoDB Atlas cluster?
Answer: To secure your MongoDB Atlas cluster, you can enable authentication and IP address whitelisting, review the security section of the MongoDB Atlas documentation, monitor the underlying infrastructure, set up global clusters on cloud platforms, select appropriate instance size, storage size, and storage speed options, encrypt data at rest with encrypted storage volumes, configure additional encryption on data at rest, and consider setting up a larger number of replica nodes for increased protection against downtime.


  warn_deprecated(



Source documents:
[Document(page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba4e'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}),
 Document(page_content='MongoD B Atlas team are also monitoring the underlying\ninfrastructure, ensuring that it is always in a healthy state.\nApplication L ogs And Database L ogs', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba19'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 15}),
 Document(page_content='All the user needs to do in order for MongoD B Atlas to\nautomatically deploy the cluster is to select a handful of\noptions:\n•Instance size\n•Storage size (optional)\n•Storage speed (optional)', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba07'}, 'source': 'https://query.pr

# Example 2
RAG with filtering

In [13]:
# Instantiate Atlas Vector Search as a retriever
retriever = vector_search.as_retriever(
   search_type = "similarity",
   search_kwargs = {
      "k": 10,
      "score_threshold": 0.75,
      "pre_filter": { "page": { "$eq": 17 } }
   }
)

# Define a prompt template
template = """

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
"""
custom_rag_prompt = PromptTemplate.from_template(template)

llm = ChatOpenAI()

def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

# Construct a chain to answer questions on your data
rag_chain = (
   { "context": retriever | format_docs, "question": RunnablePassthrough()}
   | custom_rag_prompt
   | llm
   | StrOutputParser()
)

# Prompt the chain
question = "How can I secure my MongoDB Atlas cluster?"
answer = rag_chain.invoke(question)

print("Question: " + question)
print("Answer: " + answer)

# Return source documents
documents = retriever.get_relevant_documents(question)
print("\nSource documents:")
pprint.pprint(documents)

Question: How can I secure my MongoDB Atlas cluster?
Answer: To secure your MongoDB Atlas cluster, you can enable authentication and IP Address whitelisting, utilize roles and permissions to control access, implement a Defense in Depth approach, consider VPC Peering for secure connectivity, and consider integrating with LDAP for centralized authorization management. Additionally, regularly review the security section of MongoDB Atlas for best practices and updates on security measures.

Source documents:
[Document(page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas', metadata={'_id': {'$oid': '665b847e0e1fd5f2b980ba4e'}, 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}),
 Document(page_content='Security\nAs with all software, MongoD B administrators must\nconsider security and risk e xposure for a MongoD B\ndepl