In [2]:
from langchain_chroma import Chroma

from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
# loading splitting embedding data
txt_loader = TextLoader('aitrading.txt')
text_data = txt_loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=50)
splitted_docs = splitter.split_documents(text_data)

embedding  = OllamaEmbeddings(model='gemma:2b')
vectors = embedding.embed_documents(splitted_docs)
vectors

[[0.0996420755982399,
  -2.7930102348327637,
  -1.3544838428497314,
  0.1321248710155487,
  -0.7876529693603516,
  -1.5258777141571045,
  -0.28081977367401123,
  -0.8861866593360901,
  1.6933008432388306,
  -0.45332595705986023,
  0.14283521473407745,
  -1.1582690477371216,
  -0.3037075698375702,
  1.484845519065857,
  -1.2946580648422241,
  0.5558221340179443,
  1.9853477478027344,
  0.6229881644248962,
  -1.007188320159912,
  0.046096328645944595,
  0.1681913137435913,
  -0.06816165149211884,
  0.5498296022415161,
  1.6526904106140137,
  2.2413697242736816,
  1.1931183338165283,
  -0.5702166557312012,
  0.16624869406223297,
  0.653521716594696,
  -0.7089496850967407,
  0.020702378824353218,
  -1.102590799331665,
  -0.30798470973968506,
  -0.010753517970442772,
  -1.0837476253509521,
  -0.5053603649139404,
  -1.2511087656021118,
  -0.2507474720478058,
  1.0532768964767456,
  -1.190142035484314,
  -0.48707911372184753,
  -0.03240957111120224,
  0.03327823430299759,
  -0.477987647056579

In [5]:
print("no of docs:",len(vectors))
print("shape of each doc:",len(vectors[0]))


no of docs: 10
shape of each doc: 2048


In [4]:
# using chroma to store 
vectordb  = Chroma.from_documents(documents=splitted_docs,embedding=embedding)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x1e1e2459c10>

In [6]:
# to query from db 
query = "does ths adaption of ai in stocks are with challenges?"
queried_docs = vectordb.similarity_search(query)
queried_docs

[Document(metadata={'source': 'aitrading.txt'}, page_content='who embrace AI are likely to gain a significant advantage, while those who resist may find themselves struggling to keep up with the rapid pace of change in the industry.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the black-box nature of some AI models makes it difficult for traders to understand the reasoning behind specific trading decisions, raising concerns about transparency and accountability.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the identification of profitable opportunities with minimal human intervention.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='impacts on stock prices. This ability to process unstructured data has given traders a competitive edge, as they can respond to market changes more swiftly and accurately.')]

In [7]:
# saving to local disk 
vectordb  = Chroma.from_documents(documents=splitted_docs,embedding=embedding,persist_directory="./chroma_db")

In [8]:
# to load the chroma vectordb from local
vectordb_loaded = Chroma(persist_directory="./chroma_db",embedding_function=embedding)
docs_new = vectordb_loaded.similarity_search(query)
docs_new

[Document(metadata={'source': 'aitrading.txt'}, page_content='who embrace AI are likely to gain a significant advantage, while those who resist may find themselves struggling to keep up with the rapid pace of change in the industry.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the black-box nature of some AI models makes it difficult for traders to understand the reasoning behind specific trading decisions, raising concerns about transparency and accountability.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the identification of profitable opportunities with minimal human intervention.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='impacts on stock prices. This ability to process unstructured data has given traders a competitive edge, as they can respond to market changes more swiftly and accurately.')]

In [9]:
# as retriever 
retriever_cdb  = vectordb_loaded.as_retriever()
retriever_cdb.invoke(query)

[Document(metadata={'source': 'aitrading.txt'}, page_content='who embrace AI are likely to gain a significant advantage, while those who resist may find themselves struggling to keep up with the rapid pace of change in the industry.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the black-box nature of some AI models makes it difficult for traders to understand the reasoning behind specific trading decisions, raising concerns about transparency and accountability.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the identification of profitable opportunities with minimal human intervention.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='impacts on stock prices. This ability to process unstructured data has given traders a competitive edge, as they can respond to market changes more swiftly and accurately.')]