## Chroma

Chroma is a AI native open source vector database focused on developer productivity and hapiness

In [5]:
from langchain_chroma import Chroma

from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [6]:
loader=TextLoader('speech.txt')

data=loader.load()
data

[Document(metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [7]:
text_splitter=CharacterTextSplitter(chunk_size=50, chunk_overlap=20)

splits=text_splitter.split_documents(data)

splits

[Document(metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [8]:
embedding=OllamaEmbeddings(model='llama3')
vectordb=Chroma.from_documents(splits, embedding)
vectordb

  embedding=OllamaEmbeddings(model='llama3')


<langchain_chroma.vectorstores.Chroma at 0x11462d9a0>

In [10]:
query="Who is Head coach of the Indian men's cricket team"

docs=vectordb.similarity_search(query)

docs

[Document(id='32870921-7e3d-4c22-95bf-80c923e1be32', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [11]:
# Saving and Loading

vectordb=Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="./chroma_db")

In [12]:
db2=Chroma(persist_directory="./chroma_db", embedding_function=embedding)

In [13]:
db2.similarity_search(query)

[Document(id='c961dbb9-ba16-4cb1-a984-508bd5887e95', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [14]:
retriever=vectordb.as_retriever()
docs=retriever.invoke(query)

docs

[Document(id='c961dbb9-ba16-4cb1-a984-508bd5887e95', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]