<img src = "https://github.com/VeryFatBoy/notebooks/blob/main/common/images/img_github_singlestore-jupyter_featured_2.png?raw=true">

<div id="singlestore-header" style="display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;">
    <div id="icon-image" style="width: 90px; height: 90px;">
        <img width="100%" height="100%" src="https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/browser.png" />
    </div>
    <div id="text" style="padding: 5px; margin-left: 10px;">
        <div id="badge" style="display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%">SingleStore Notebooks</div>
        <h1 style="font-weight: 500; margin: 8px 0 0 4px;">Replace MongoDB® Atlas with SingleStore Kai in LangChain</h1>
    </div>
</div>

In [6]:
!pip cache purge --quiet

In [7]:
!pip install langchain_openai langchain-mongodb --quiet

In [8]:
from langchain_core.documents import Document
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import OpenAIEmbeddings
from pymongo import MongoClient
from uuid import uuid4
from singlestoredb.management import get_secret

In [9]:
os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(
    model = "text-embedding-3-small"
)

In [10]:
client = MongoClient(connection_url_kai)

DB_NAME = "langchain_test_db"
COLLECTION_NAME = "langchain_test_vectorstores"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "langchain_test_index_vectorstores"

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(
    collection = MONGODB_COLLECTION,
    embedding = embeddings,
    index_name = ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn = "dotProduct",
)

In [12]:
MONGODB_COLLECTION.create_index(
    [("embedding", "vector")],
    name = ATLAS_VECTOR_SEARCH_INDEX_NAME,
    kaiIndexOptions = {
        "index_type": "AUTO",
        "metric_type": "DOT_PRODUCT",
        "dimensions": 1536
    }
)

'langchain_test_index_vectorstores'

In [13]:
document_1 = Document(
    page_content = "I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata = {"source": "tweet"},
)

document_2 = Document(
    page_content = "The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata = {"source": "news"},
)

document_3 = Document(
    page_content = "Building an exciting new project with LangChain - come check it out!",
    metadata = {"source": "tweet"},
)

document_4 = Document(
    page_content = "Robbers broke into the city bank and stole $1 million in cash.",
    metadata = {"source": "news"},
)

document_5 = Document(
    page_content = "Wow! That was an amazing movie. I can't wait to see it again.",
    metadata = {"source": "tweet"},
)

document_6 = Document(
    page_content = "Is the new iPhone worth the price? Read this review to find out.",
    metadata = {"source": "website"},
)

document_7 = Document(
    page_content = "The top 10 soccer players in the world right now.",
    metadata = {"source": "website"},
)

document_8 = Document(
    page_content = "LangGraph is the best framework for building stateful, agentic applications!",
    metadata = {"source": "tweet"},
)

document_9 = Document(
    page_content = "The stock market is down 500 points today due to fears of a recession.",
    metadata = {"source": "news"},
)

document_10 = Document(
    page_content = "I have a bad feeling I am going to get deleted :(",
    metadata = {"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents = documents, ids = uuids)

['74beaaeb-897f-417a-aa09-f0b171859275',
 'fb22674d-85bc-454e-a95a-3cca20cd4b5d',
 'c474e923-a4ee-4258-890c-95882571dd8c',
 'd1d19d5c-518b-4d60-98e7-c6b0d2621efa',
 '895e61dd-4262-4f11-b174-8f04ed9fe443',
 '6ce2cae1-9877-4fc1-a1cf-2df3dc7910d5',
 '2ee33b04-c161-4b0f-9a87-fb1c803e028d',
 'e476495d-6812-48cb-92aa-381efc23f76c',
 '6bd53c68-e97c-4dbd-a0c2-7bb20221a16b',
 '3dc32b0d-417c-45fd-82ce-85e0aba15c5e']

In [14]:
vector_store.delete(ids = [uuids[-1]])

True

In [15]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy", k = 2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'_id': 'c474e923-a4ee-4258-890c-95882571dd8c', 'source': 'tweet'}]
* LangGraph is the best framework for building stateful, agentic applications! [{'_id': 'e476495d-6812-48cb-92aa-381efc23f76c', 'source': 'tweet'}]


In [16]:
results = vector_store.similarity_search_with_score("Will it be hot tomorrow?", k = 1)
for res, score in results:
    print(f"* [SIM = {score:3f}] {res.page_content} [{res.metadata}]")

* [SIM = 0.569169] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'_id': 'fb22674d-85bc-454e-a95a-3cca20cd4b5d', 'source': 'news'}]


In [17]:
retriever = vector_store.as_retriever(
    search_type = "similarity_score_threshold",
    search_kwargs = {"k": 1, "score_threshold": 0.2},
)
retriever.invoke("Stealing from the bank is a crime")

[Document(metadata={'_id': 'd1d19d5c-518b-4d60-98e7-c6b0d2621efa', 'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

## Cleanup

In [20]:
MONGODB_COLLECTION.drop()

In [21]:
client.drop_database(DB_NAME)

In [22]:
client.close()

[MongoDB Atlas](https://github.com/langchain-ai/langchain/blob/master/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb)

## MIT License

Copyright (c) LangChain, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.