### Embeddings using open ai and Vectordb using chromadb
#### Converting text into vectors

In [1]:
import os
from dotenv import load_dotenv
# to load all the env variables
load_dotenv()



True

In [2]:
# to load openai api key to the enviroment
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [3]:
from langchain_openai import OpenAIEmbeddings
# we have 3 models in embeddings : search openai embeddings documents
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
embeddings


OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001C54B368F50>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001C54B139ED0>, model='text-embedding-3-large', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [4]:
## example for embeddings using openai
text = "this is my first embedding from openai text embedding 3 large "
text_vectors = embeddings.embed_query(text)
text_vectors

[0.02438170090317726,
 0.015331151895225048,
 -0.0076848650351166725,
 -0.018394295126199722,
 0.018718356266617775,
 -0.0017370032146573067,
 -0.012468616478145123,
 0.07234266400337219,
 0.012414606288075447,
 0.010979481041431427,
 0.0351528562605381,
 -0.00018300741794519126,
 -0.01082516647875309,
 -0.0030052759684622288,
 0.016773993149399757,
 0.031017227098345757,
 0.033208493143320084,
 0.04678817465901375,
 -0.023625558242201805,
 -0.048022691160440445,
 0.022221297025680542,
 -0.001561470446176827,
 -0.011951662600040436,
 0.025076115503907204,
 0.06462693959474564,
 -0.007507403381168842,
 0.014490136876702309,
 0.028285859152674675,
 -0.017792468890547752,
 0.007646286394447088,
 0.013309630565345287,
 0.0009697705390863121,
 -0.012198565527796745,
 -0.01663510873913765,
 0.029258040711283684,
 0.010400800965726376,
 0.04209701344370842,
 0.033671438694000244,
 -0.018363432958722115,
 0.00837928056716919,
 0.029504943639039993,
 0.0068284193985164165,
 -0.02342494949698448

In [5]:
len(text_vectors)
# 3072 is a constant dimensions of word in openai, we can change the dimension if necessary


3072

In [6]:
# embedding with 1069 dimensions
embeddings_1069 = OpenAIEmbeddings(model="text-embedding-3-large",dimensions=1069)
text_vectors_1069 = embeddings_1069.embed_query(text)
text_vectors_1069

[0.03270971402525902,
 0.02062765508890152,
 -0.01039679441601038,
 -0.024703405797481537,
 0.025097498670220375,
 -0.002347704954445362,
 -0.016748949885368347,
 0.09732021391391754,
 0.016717838123440742,
 0.014788856729865074,
 0.04733263701200485,
 -0.00029670403455384076,
 -0.014550326392054558,
 -0.003995376639068127,
 0.02254626527428627,
 0.041670143604278564,
 0.044677697122097015,
 0.06280597299337387,
 -0.03177633509039879,
 -0.06450679153203964,
 0.029909580945968628,
 -0.002028800779953599,
 -0.016085214912891388,
 0.03368457779288292,
 0.08686638623476028,
 -0.010085668414831161,
 0.019455745816230774,
 0.038206275552511215,
 -0.023894477635622025,
 0.010241230949759483,
 0.01782752014696598,
 0.0012788575841113925,
 -0.016427453607320786,
 -0.022359589114785194,
 0.039367809891700745,
 0.01393844559788704,
 0.05662493407726288,
 0.0452999472618103,
 -0.02474488876760006,
 0.011242019943892956,
 0.03965819627046585,
 0.009173031896352768,
 -0.03150669485330582,
 -0.011076

In [7]:
len(text_vectors_1069)

1069

#### Vectordb using chroma db

In [8]:
# data loading 
from langchain_community.document_loaders import TextLoader
text_loader = TextLoader("aitrading.txt")
text_documents = text_loader.load()


# data transformation
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=50)
splitted_text_docs = text_splitter.split_documents(text_documents)
splitted_text_docs




[Document(metadata={'source': 'aitrading.txt'}, page_content='Artificial intelligence (AI) has revolutionized various industries, and stock trading is no exception. The integration of AI in stock trading has led to the development of sophisticated algorithms capable of analyzing vast amounts of data at unprecedented speeds. Traders and investors now rely on AI'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='speeds. Traders and investors now rely on AI to make informed decisions based on real-time market trends, historical data, and predictive analytics. The accuracy and efficiency of AI-driven trading systems have outpaced traditional methods, enabling the identification of profitable opportunities'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='the identification of profitable opportunities with minimal human intervention.'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='These AI systems use machine learning models, such as neural netw

In [9]:
# need to store this embeddings in a vector db to use further
from langchain_community.vectorstores import Chroma

# docs after splitting apply a embedding techq that for all docs
db_1069 = Chroma.from_documents(splitted_text_docs,embeddings_1069)
db_1069

<langchain_community.vectorstores.chroma.Chroma at 0x1c54b15d510>

In [10]:
# how to srach a text in vector db
query_text = "However, the adoption of AI in stock trading is not without challenges"
retrieved_results_from_db_1069 = db_1069.similarity_search(query=query_text)
retrieved_results_from_db_1069

[Document(metadata={'source': 'aitrading.txt'}, page_content='However, the adoption of AI in stock trading is not without challenges. The reliance on algorithms can lead to unforeseen market volatility, especially when multiple AI systems simultaneously execute trades based on similar data patterns. Moreover, the black-box nature of some AI models makes it'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='Despite these challenges, the benefits of AI in stock trading are undeniable. As technology continues to advance, AI-driven trading systems are expected to become even more sophisticated, further transforming the financial markets. Investors who embrace AI are likely to gain a significant'),
 Document(metadata={'source': 'aitrading.txt'}, page_content='Artificial intelligence (AI) has revolutionized various industries, and stock trading is no exception. The integration of AI in stock trading has led to the development of sophisticated algorithms capable of analyzing vas