### ASTRADB VectorStore
Go from app idea to production with the AI Platform with Astra DB, the ultra-low latency database made for AI and Langflow, the low-code RAG IDE
https://www.datastax.com/

In [1]:
### ASTRADB VectorStore

In [2]:
!pip install \
    "langchain>=0.3.23,<0.4" \
    "langchain-core>=0.3.52,<0.4" \
    "langchain-astradb>=0.6,<0.7"

Collecting langchain-core<0.4,>=0.3.52
  Downloading langchain_core-0.3.79-py3-none-any.whl.metadata (3.2 kB)
Downloading langchain_core-0.3.79-py3-none-any.whl (449 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.8/449.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-core
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 1.0.0
    Uninstalling langchain-core-1.0.0:
      Successfully uninstalled langchain-core-1.0.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-openai 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.[0m[31m
[0mSuccessfully installed langchain-core-0.3.79


In [3]:
!pip install langchain_openai

Collecting langchain-core<2.0.0,>=1.0.0 (from langchain_openai)
  Using cached langchain_core-1.0.0-py3-none-any.whl.metadata (3.4 kB)
Using cached langchain_core-1.0.0-py3-none-any.whl (467 kB)
Installing collected packages: langchain-core
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.79
    Uninstalling langchain-core-0.3.79:
      Successfully uninstalled langchain-core-0.3.79
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-astradb 0.6.1 requires langchain-core<1.0.0,>=0.3.74, but you have langchain-core 1.0.0 which is incompatible.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you have langchain-core 1.0.0 which is incompatible.[0m[31m
[0mSuccessfully installed langchain-core-1.0.0


In [4]:
import os
from google.colab import userdata

# Access the environment variables from Colab Secrets Manager
openai_api_key = userdata.get('OPENAI_API_KEY')
astra_db_application_token = userdata.get('ASTRA_DB_APPLICATION_TOKEN')
astra_db_api_endpoint = userdata.get('ASTRA_DB_API_ENDPOINT')

if openai_api_key:
    os.environ['OPENAI_API_KEY'] = openai_api_key
    print("OPENAI_API_KEY loaded successfully from Secrets Manager.")
else:
    print("OPENAI_API_KEY not found in Secrets Manager. Please add it.")

if astra_db_application_token:
    os.environ['ASTRA_DB_APPLICATION_TOKEN'] = astra_db_application_token
    print("ASTRA_DB_APPLICATION_TOKEN loaded successfully from Secrets Manager.")
else:
    print("ASTRA_DB_APPLICATION_TOKEN not found in Secrets Manager. Please add it.")

if astra_db_api_endpoint:
    os.environ['ASTRA_DB_API_ENDPOINT'] = astra_db_api_endpoint
    print("ASTRA_DB_API_ENDPOINT loaded successfully from Secrets Manager.")
else:
    print("ASTRA_DB_API_ENDPOINT not found in Secrets Manager. Please add it.")

OPENAI_API_KEY loaded successfully from Secrets Manager.
ASTRA_DB_APPLICATION_TOKEN loaded successfully from Secrets Manager.
ASTRA_DB_API_ENDPOINT loaded successfully from Secrets Manager.


In [5]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings(model="text-embedding-3-small",dimensions=1024,api_key=openai_api_key)

In [6]:
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x7edd3c992210>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x7edc46005160>, model='text-embedding-3-small', dimensions=1024, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [7]:
from langchain_astradb import AstraDBVectorStore
vector_store=AstraDBVectorStore(
    embedding=embeddings,
    api_endpoint=astra_db_api_endpoint,
    collection_name="astra_vector_langchain",
    token=astra_db_application_token,
    namespace=None,

)
vector_store

<langchain_astradb.vectorstores.AstraDBVectorStore at 0x7edc45cfbfb0>

In [8]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [9]:
vector_store.add_documents(documents=documents)

['d5feee52a3314892b8a05574c3599e00',
 'b97f9bb0f39845d2954016477cde51bf',
 'dfd39ade35244ebba4e84fa4dbc011d8',
 '7b984511e8324ab788c6ae0463161b64',
 '1264aa1a540e444cb7e7cc9891af74a5',
 'f8ba7869bdcb48118d1ec77ed99af435',
 'ec2c34343fff43d787fc6048daadf227',
 '7a8cc5babf0249b5bcc0600de28fcc27',
 '6544594d1db34861bcf70fc3c22f9993',
 'ade01c25c68f4165a8accba2a0ae3cf2']

In [10]:
### Search from Vector Store DB

vector_store.similarity_search("What is the weather")

[Document(id='b97f9bb0f39845d2954016477cde51bf', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='6544594d1db34861bcf70fc3c22f9993', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='dfd39ade35244ebba4e84fa4dbc011d8', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='ade01c25c68f4165a8accba2a0ae3cf2', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [11]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,
    filter={"source": "tweet"},
)
for res in results:
    print(f'* "{res.page_content}", metadata={res.metadata}')

* "Building an exciting new project with LangChain - come check it out!", metadata={'source': 'tweet'}
* "LangGraph is the best framework for building stateful, agentic applications!", metadata={'source': 'tweet'}
* "Wow! That was an amazing movie. I can't wait to see it again.", metadata={'source': 'tweet'}


In [12]:
results = vector_store.similarity_search_with_score(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,
    filter={"source": "tweet"},
)
for res, score in results:
    print(f'* [SIM={score:.2f}] "{res.page_content}", metadata={res.metadata}')

* [SIM=0.72] "Building an exciting new project with LangChain - come check it out!", metadata={'source': 'tweet'}
* [SIM=0.71] "LangGraph is the best framework for building stateful, agentic applications!", metadata={'source': 'tweet'}
* [SIM=0.53] "Wow! That was an amazing movie. I can't wait to see it again.", metadata={'source': 'tweet'}


In [13]:
### Retriever
retriever=vector_store.as_retriever(
  search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.5},
)
retriever.invoke("Stealing from the bank is a crime", filter={"source": "news"})

[Document(id='7b984511e8324ab788c6ae0463161b64', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

In [14]:
### Retriever
retriever=vector_store.as_retriever(
  search_type="mmr",
    search_kwargs={"k": 1},
)
retriever.invoke("Stealing from the bank is a crime", filter={"source": "news"})

[Document(id='7b984511e8324ab788c6ae0463161b64', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]