# Vector Store

In [None]:
from devtools import debug

!export PYTHONPATH=":./python"

### Split the text into chunks

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from python.ai_core.embeddings import EmbeddingsFactory
from python.ai_core.vector_store import VectorStoreFactory

loader = TextLoader("use_case_data/other/state_of_the_union.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
debug(texts)

### Calculate embeddings and oyt them in a vector database .

This is a adaptation of https://python.langchain.com/v0.1/docs/modules/data_connection/vectorstores/ <br><br>
We have a factory to facilitate Vector Store configuration and selection. <br>
One benefit if that it's easy to change vector store type and embedding model, just by configuration.

In [None]:
vs_factory = VectorStoreFactory(
    id="Chroma_in_memory",
    collection_name="maintenance_procedure",
    embeddings_factory=EmbeddingsFactory(),
)

debug(vs_factory)

db = vs_factory.vector_store
db.add_documents(texts)

### Test a query

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query, k=3)
debug(docs)

### Vector Store as Runnable

In [None]:
retriever = db.as_retriever()

retriever.invoke(query)

### Configurable Retriever Runnable 

It's a little bit tricky, so there is a small wrapper in the Factory: 

In [None]:
retriever = vs_factory.get_configurable_retriever(default_k=20)

config = {"configurable": {"search_kwargs": {"k": 1}}}
result = retriever.invoke(query, config=config)
result  # return 1 result