Chroma

###### Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0

In [4]:
from langchain_community.document_loaders import TextLoader
from langchain_chroma.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loaders = TextLoader("speech.txt")
documents = loaders.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)
docs = text_splitter.split_documents(documents)

In [6]:
embeddings = OllamaEmbeddings(model="llama3")
db = Chroma.from_documents(docs, embeddings)
db

<langchain_chroma.vectorstores.Chroma at 0x13f2cd750>

In [8]:
# query
query = "What are the two aspects of the obesity spread?"
query_result = db.similarity_search(query)
docs

[Document(metadata={'source': 'speech.txt'}, page_content='Ladies and Gentlemen,'),
 Document(metadata={'source': 'speech.txt'}, page_content='It is a pleasure for me to be here tonight and address such a great audience. The issue I would like to bring up threatens the prosperity and welfare of the whole nation, however, the majority of the'),
 Document(metadata={'source': 'speech.txt'}, page_content='however, the majority of the population tends to ignore it and pretend as if it is not a problem at all. Namely, I would like to talk about the risks of obesity.'),
 Document(metadata={'source': 'speech.txt'}, page_content='First of all, it would be reasonable to present the statistics that some of you might find shocking. To be more precise, in accordance with the data provided by the Office of Disease Prevention and'),
 Document(metadata={'source': 'speech.txt'}, page_content='of Disease Prevention and Health Promotion, the number of people suffering from obesity has already reached the

In [9]:
# save to disk
vectordb = Chroma.from_documents(docs, embedding=embeddings, persist_directory="./chroma_db")

In [10]:
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
docs = db2.similarity_search(query)

In [11]:
### Retriever option
retriever = vectordb.as_retriever()
retriever.invoke(query)[0].page_content

'Ladies and Gentlemen, let me once again thank you for your patience. Being aware and responsible, we are capable of overcoming the threats of obesity spread. Thank you!'