## Converting Text into Vectors

In [1]:
import os
from dotenv import load_dotenv
load_dotenv() ## load the .env file

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
from langchain_openai import OpenAIEmbeddings;
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x78a37f4e0950>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x78a37f3b7020>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [5]:
text = "This is a openai embeddings example"
vector_text = embeddings.embed_query(text)
vector_text

[-0.01776592619717121,
 -0.0024440265260636806,
 -0.006882046349346638,
 -0.005981979891657829,
 0.00823906995356083,
 0.017585912719368935,
 7.999989611562341e-05,
 0.009021434932947159,
 -0.027001993730664253,
 -0.03639037907123566,
 0.006206996738910675,
 0.02228010632097721,
 0.0016036761226132512,
 -0.010018431581556797,
 0.007262843661010265,
 0.023000160232186317,
 0.017198191955685616,
 0.005888511426746845,
 0.016796624287962914,
 -0.02095077745616436,
 -0.003214275697246194,
 -0.003513720817863941,
 -0.016408903524279594,
 -0.005106145981699228,
 -0.0058088903315365314,
 -0.008495242334902287,
 0.0252710971981287,
 -0.027195854112505913,
 -0.00812136847525835,
 -0.029771428555250168,
 0.015010339207947254,
 -8.627439819974825e-05,
 0.007830577902495861,
 -0.02536802738904953,
 -0.009166830219328403,
 0.007096677552908659,
 0.008889887481927872,
 -0.027306631207466125,
 0.03719351440668106,
 -0.007546710781753063,
 -0.0013033654540777206,
 0.005161534994840622,
 -0.00021830938

# Here is the all steps for open ai emedding
Step 1 : Data Ingestion - Load Documents
Step 2 : Text Splitter
Step 3 : Converting into vectors and storing in Vector DB
Step 4 : Query in db for getting any data

In [None]:
# Step 1 : Data Ingestion - Load Documents
from langchain_community.document_loaders import  PyPDFLoader;
loader = PyPDFLoader("nodejs.pdf")
pdf_document = loader.load();
pdf_document

[Document(metadata={'source': 'nodejs.pdf', 'page': 0}, page_content="Top Node.js Interview Questions and Answers\nQ: What is Node.js?\nA: Node.js is an open-source, cross-platform runtime environment built on Chrome's V8 JavaScript\nengine. It allows JavaScript to be used for server-side scripting, enabling developers to build\nscalable and high-performance applications.\nQ: What is the difference between Node.js and JavaScript?\nA: JavaScript is a programming language primarily used for client-side scripting in browsers, while\nNode.js is a runtime environment that enables JavaScript to run on the server.\nQ: What are the key features of Node.js?\nA: - Non-blocking and event-driven I/O\n- Built-in asynchronous programming support\n- Single-threaded but highly scalable\n- Large ecosystem via npm (Node Package Manager)\nQ: Explain the event loop in Node.js.\nA: The event loop is the mechanism that allows Node.js to perform non-blocking I/O operations. It\ncontinuously checks the call s

In [None]:
# Step 2 : Text Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=50)
chunks = text_splitter.split_documents(pdf_document)
chunks

[Document(metadata={'source': 'nodejs.pdf', 'page': 0}, page_content="Top Node.js Interview Questions and Answers\nQ: What is Node.js?\nA: Node.js is an open-source, cross-platform runtime environment built on Chrome's V8 JavaScript"),
 Document(metadata={'source': 'nodejs.pdf', 'page': 0}, page_content='engine. It allows JavaScript to be used for server-side scripting, enabling developers to build\nscalable and high-performance applications.\nQ: What is the difference between Node.js and JavaScript?'),
 Document(metadata={'source': 'nodejs.pdf', 'page': 0}, page_content='A: JavaScript is a programming language primarily used for client-side scripting in browsers, while\nNode.js is a runtime environment that enables JavaScript to run on the server.'),
 Document(metadata={'source': 'nodejs.pdf', 'page': 0}, page_content='Q: What are the key features of Node.js?\nA: - Non-blocking and event-driven I/O\n- Built-in asynchronous programming support\n- Single-threaded but highly scalable'),


In [10]:
## Converting chunks into vector text and storing in the vector DB (ChromaDB)
# Step 3 : Converting into vectors and storing in Vector DB
from langchain_community.vectorstores import Chroma
vector_emeddings1024 = OpenAIEmbeddings(model="text-embedding-3-large",dimensions=1024) # open ai emedding
db = Chroma.from_documents(chunks,vector_emeddings1024) # Converting vector and storing in vector db
db


<langchain_community.vectorstores.chroma.Chroma at 0x78a37ee548c0>

In [11]:
# Query on vector Db - Step 4 : Query in db for getting any data
query = "JavaScript  is  a  programming  language  primarily  used"
result = db.similarity_search(query)
result

[Document(metadata={'page': 0, 'source': 'nodejs.pdf'}, page_content='A: JavaScript is a programming language primarily used for client-side scripting in browsers, while\nNode.js is a runtime environment that enables JavaScript to run on the server.'),
 Document(metadata={'page': 0, 'source': 'nodejs.pdf'}, page_content='engine. It allows JavaScript to be used for server-side scripting, enabling developers to build\nscalable and high-performance applications.\nQ: What is the difference between Node.js and JavaScript?'),
 Document(metadata={'page': 0, 'source': 'nodejs.pdf'}, page_content="Top Node.js Interview Questions and Answers\nQ: What is Node.js?\nA: Node.js is an open-source, cross-platform runtime environment built on Chrome's V8 JavaScript"),
 Document(metadata={'page': 1, 'source': 'nodejs.pdf'}, page_content='systems, streams, and networking.\nQ: Explain how to secure a Node.js application.\nA: - Use HTTPS.\n- Sanitize and validate user inputs.')]