# MongoDB Atlas
- Jupyter notebook for adding documents into MongoDB collection

In [1]:
import os
import getpass
from uuid import uuid4

from langchain_core.documents import Document
from langchain_openai import AzureOpenAIEmbeddings
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from dotenv import load_dotenv, find_dotenv

In [2]:
# Load environment variables from .env file
load_dotenv(find_dotenv())

True

## Credentials

In [3]:

mongodb_user = os.environ.get("MONGODB_USER")
mongodb_password = os.environ.get("MONGODB_PASSWORD")
mongodb_cluster = os.environ.get("MONGODB_CLUSTER")

MONGODB_CLUSTER_URI = (
    f"mongodb+srv://{mongodb_user}:{mongodb_password}@{mongodb_cluster}"
)

## Initialization

In [4]:
embeddings = AzureOpenAIEmbeddings(model="text-embedding-ada-002", chunk_size=16)

In [5]:
# initialize MongoDB python client
client = MongoClient(MONGODB_CLUSTER_URI)

DB_NAME = os.environ.get("DB_NAME")
VECTOR_COLLECTION = os.environ.get("VECTOR_COLLECTION")
VECTOR_SEARCH_INDEX = os.environ.get("VECTOR_SEARCH_INDEX")

MONGODB_COLLECTION = client[DB_NAME][VECTOR_COLLECTION]

vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=VECTOR_SEARCH_INDEX,
    relevance_score_fn="cosine",
)

## Manage vector store
### Add items to vector store

In [16]:
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['d1246b01-4709-4510-81e5-8143693834cd',
 'b17e9efb-d86b-429d-97a9-7a0b98fbd540',
 '33ae656a-245c-4fdd-8a82-d801c3f090e2',
 '8bbb326c-3728-4674-aba2-c559e32ceffc',
 '6d609618-19c9-46e2-8b68-af33e50d1807',
 '859c323a-64dd-4da2-aae9-eb4ed749a54d',
 '5e9c5feb-7a46-4d9d-adb2-04351312fd82',
 '5efba569-0148-4bcd-a3e6-1e41e8094b87',
 '7ba7dd5e-932b-4e08-bc71-77f5ff4cd7ce',
 'cba1b88f-46a2-49f8-a4b4-89bc02bf67b9']

### Delete items from vector store

In [17]:
vector_store.delete(ids=[uuids[-1]])

True

## Query vector store
### Query directly

In [6]:
# Similarity search
results = vector_store.similarity_search(
    "Building an exciting new Project with LangChain", k=2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'_id': '33ae656a-245c-4fdd-8a82-d801c3f090e2', 'source': 'tweet'}]
* LangGraph is the best framework for building stateful, agentic applications! [{'_id': '5efba569-0148-4bcd-a3e6-1e41e8094b87', 'source': 'tweet'}]


In [7]:
# Similarity search with score
results = vector_store.similarity_search_with_score("Will it be hot tomorrow?", k=1)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.921679] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'_id': 'b17e9efb-d86b-429d-97a9-7a0b98fbd540', 'source': 'news'}]


### Pre-filtering with Similarity Search
- Atlas vector Search supports pre-filtering using MQL Operators for filtering
- Below an example index and query on the same data loaded above that allows you do metadata filtering on the "page" field

```
{
  "fields":[
    {
      "type": "vector",
      "path": "embedding",
      "numDimensions": 1536,
      "similarity": "cosine"
    },
    {
      "type": "filter",
      "path": "source"
    }
  ]
}
```

In [17]:
# Query with filters
results = vector_store.similarity_search(query="foo",k=1,pre_filter={"source": {"$eq": "tweet"}})
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* I had chocalate chip pancakes and scrambled eggs for breakfast this morning. [{'_id': 'd1246b01-4709-4510-81e5-8143693834cd', 'source': 'tweet'}]


### Query by turning into retriever
- You can also transform the vector into a retriever for easier usage in your chains

In [21]:
# Transform vector store into a retriever
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 2, "score_threshold": 0.2},
)
# Invoke retriever with a simple query and filter
retriever.invoke("Stealing from the bank is a crime.")

No relevant docs were retrieved using the relevance score threshold 0.2


[]