In [3]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings

In [4]:
loader = TextLoader("speech.txt")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 20)
final_document = text_splitter.split_documents(docs)
final_document

[Document(metadata={'source': 'speech.txt'}, page_content='Good morning, everyone.'),
 Document(metadata={'source': 'speech.txt'}, page_content="Life often presents us with the same circumstances, but it's our perspective that determines the"),
 Document(metadata={'source': 'speech.txt'}, page_content='that determines the outcome. Think about three common items: a potato, an egg, and a coffee bean.'),
 Document(metadata={'source': 'speech.txt'}, page_content='and a coffee bean. All are subjected to the same boiling water, an intense challenge or adversity.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The potato starts off strong and hard, but the boiling water makes it soft and weak. It loses its'),
 Document(metadata={'source': 'speech.txt'}, page_content='weak. It loses its form and substance.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The egg is fragile on the outside, but inside, the liquid interior hardens under the pressure and'),
 Document(met

In [6]:

embedding = OllamaEmbeddings(model = "gemma:2b")
vector = Chroma.from_documents(documents = final_document, embedding = embedding)
vector

<langchain_chroma.vectorstores.Chroma at 0x11df86fd0>

In [7]:
query = " Life often presents us with the same circumstances"
docs = vector.similarity_search(query)
docs

[Document(id='4b09a821-9fe1-48aa-ac46-b07302183321', metadata={'source': 'speech.txt'}, page_content='the pressure and heat. It changes internally, becoming rigid and unyielding.'),
 Document(id='9f49694a-cd7c-4af1-bdf3-2376dbf8718a', metadata={'source': 'speech.txt'}, page_content='and a coffee bean. All are subjected to the same boiling water, an intense challenge or adversity.'),
 Document(id='481d7190-82a6-48d1-a6e7-925c270f09f8', metadata={'source': 'speech.txt'}, page_content='weak. It loses its form and substance.'),
 Document(id='d5358874-48f7-406b-9d36-e61349896486', metadata={'source': 'speech.txt'}, page_content='the water itself, creating a rich, new brew.')]

In [8]:
ctordb = Chroma.from_documents(documents = final_document, embedding = embedding, persist_directory = "./chroma-db")


In [10]:
db = Chroma(persist_directory="./chroma-db/", embedding_function=embedding)
docs1 = db.similarity_search(query)
print(docs1)

[Document(id='31e884ab-468e-4d9f-a9ce-1e515ef0829a', metadata={'source': 'speech.txt'}, page_content='the pressure and heat. It changes internally, becoming rigid and unyielding.'), Document(id='a0da1be6-e017-47d1-97f2-4cd2494be871', metadata={'source': 'speech.txt'}, page_content='and a coffee bean. All are subjected to the same boiling water, an intense challenge or adversity.'), Document(id='f440c2c4-052c-44b2-9620-18553b13b324', metadata={'source': 'speech.txt'}, page_content='weak. It loses its form and substance.'), Document(id='bef82a5e-6ada-4f3b-a631-c40f9b2a8ed3', metadata={'source': 'speech.txt'}, page_content='the water itself, creating a rich, new brew.')]
