### Embedding Techniques

Converting Text to Vectors

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [3]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000243DA2EEBF0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000243DA7F5E70>, model='text-embedding-3-large', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [4]:
text = "This is a tutorial on OPENAI embedding"
query_result = embeddings.embed_query(text)
query_result

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************n8gA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
# Check the dimensions

len(query_result)

In [8]:
# Fix the dimension size
embeddings_1024 = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=1024)

In [9]:
text = "This is a tutorial on OPENAI embedding"
query_result = embeddings_1024.embed_query(text)
query_result

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************n8gA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

### Read the text file, split, embedding, chroma db and retrieve query from db

In [10]:
# Read the text file
from langchain_community.document_loaders import TextLoader

loader = TextLoader('./data/speech.txt')
docs = loader.load()

In [11]:
# Split using RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitters = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
final_doc = text_splitters.split_documents(docs)

In [14]:
# Embedding
embeddings_1024 = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=1024)


In [15]:
# Vector Embedding and Vector StoreDB
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(final_doc, embeddings_1024)
db


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************n8gA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
# Retrieving the data based on the query
query = "Kalam collapsed and died from an apparent cardiac arrest"
retrieved_results = db.similarity_search(query)
print(retrieved_results)