# Chroma

In [12]:
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("../resources/speech.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000, chunk_overlap=30)
docs = text_splitter.split_documents(documents=documents)

In [13]:
docs

[Document(metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='The modern versi

In [14]:
embeddings = OllamaEmbeddings(model="embeddinggemma:latest")
db = Chroma.from_documents(docs, embeddings)
db

<langchain_chroma.vectorstores.Chroma at 0x11a93fe80>

In [15]:
### Querying
query = "What is a transformer?"
docs = db.similarity_search(query)
docs

[Document(id='2d56ebf6-75e5-4a09-961a-dd78a0aa26ed', metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
 Document(id='e5cb5964-faca-4100-9b45-21efa0d

In [16]:
docs[0].page_content

'In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'

In [17]:
### Retrieval
retriever = db.as_retriever()
retriever.invoke(query)

[Document(id='2d56ebf6-75e5-4a09-961a-dd78a0aa26ed', metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
 Document(id='e5cb5964-faca-4100-9b45-21efa0d

In [18]:
### Similarity Search with Score
### Distance based on Manhatthan distance

docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(id='2d56ebf6-75e5-4a09-961a-dd78a0aa26ed', metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
  218089.375),
 (Document(id='e5cb5964-faca-

In [19]:
### Saving to the disk
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./chroma_db")

In [21]:
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
db2.similarity_search("Head")

[Document(id='2d56ebf6-75e5-4a09-961a-dd78a0aa26ed', metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
 Document(id='e5cb5964-faca-4100-9b45-21efa0d