### ASTRADB VectorStore
Go from app idea to production with the AI Platform with Astra DB, the ultra-low latency database made for AI and Langflow, the low-code RAG IDE
https://www.datastax.com/

In [17]:
import os

ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_astradb import AstraDBVectorStore

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    dimensions=1024
)

embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x76ff55b038f0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x76fe20f9efc0>, model='text-embedding-3-small', dimensions=1024, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [7]:
vector_stores = AstraDBVectorStore(
    embedding=embeddings,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    collection_name="astra_vector_langchain",
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace=None,
)

vector_stores

<langchain_astradb.vectorstores.AstraDBVectorStore at 0x76fdfa254290>

In [8]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [9]:
vector_stores.add_documents(documents=documents)

['83fd3e6b8e6f4aa59dd058ec9862a168',
 'a8d9db595a4d419fa1273272f41b0cd3',
 'f453d2b16f054cc494e5a4d00dd9cd6e',
 '9f124828b0f7461890ab3d44127f739a',
 '1a36161f08774358b73cc9b6a277374e',
 '9cce08a76c21403c9e4f788f1506222a',
 'c53374b02d3b457083db44a78b75bb3f',
 '99f04b0edd55499fad63c25daa258a3e',
 '1afb5239d3c64be5a0ed54070fc54cae',
 '1f617c8d65de4e719f98372d14ee4e4a']

In [10]:
vector_stores.similarity_search("What is weather today?")

[Document(id='a8d9db595a4d419fa1273272f41b0cd3', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='83fd3e6b8e6f4aa59dd058ec9862a168', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(id='1afb5239d3c64be5a0ed54070fc54cae', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='c53374b02d3b457083db44a78b75bb3f', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]

In [11]:
result = vector_stores.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,
    filter={"source": "tweet"}
)

for res in result:
    print(f"-{res.page_content}, metadata={res.metadata}")

-Building an exciting new project with LangChain - come check it out!, metadata={'source': 'tweet'}
-LangGraph is the best framework for building stateful, agentic applications!, metadata={'source': 'tweet'}
-Wow! That was an amazing movie. I can't wait to see it again., metadata={'source': 'tweet'}


In [12]:
result = vector_stores.similarity_search_with_score(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,
    filter={"source": "tweet"},
)

for res, score in result:
    print(f'* [SIM={score:.2f}] "{res.page_content}", metadata={res.metadata}')

* [SIM=0.72] "Building an exciting new project with LangChain - come check it out!", metadata={'source': 'tweet'}
* [SIM=0.71] "LangGraph is the best framework for building stateful, agentic applications!", metadata={'source': 'tweet'}
* [SIM=0.53] "Wow! That was an amazing movie. I can't wait to see it again.", metadata={'source': 'tweet'}


In [16]:
### Retriever
retriever = vector_stores.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.7}, 
)

retriever.invoke(
    "Stealing from the bank is a crime", 
    filter={"source": "news"}
)

[Document(id='9f124828b0f7461890ab3d44127f739a', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]