In [1]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

# Load environment variables from a .env file.
# This best practice ensures that sensitive information like API keys is
# not hardcoded directly into your script, promoting security and flexibility.
load_dotenv()

# Initialize the OpenAIEmbeddings model.
# This component is responsible for converting text into high-dimensional
# numerical vectors, capturing their semantic meaning.
# - 'model': Specifies the particular OpenAI embedding model to use.
#            'text-embedding-3-small' is a state-of-the-art model offering
#            robust performance for various tasks.
# - 'dimensions': Sets the desired dimensionality of the output embedding vectors.
#                 Here, each piece of text will be represented by a 32-dimensional vector.
#                 This can be useful for managing computational resources or storage,
#                 as larger dimensions generally mean more detail but also more data.
embedding = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=32)

# Define a list of documents (strings) to be embedded.
# Each string in this list represents a separate document for which we want
# to generate an embedding.
documents = [
    "Tokyo is the capital of Japan",
    "Rome is the capital of Italy",
    "Berlin is the capital of Germany"
]

# Embed multiple documents simultaneously.
# The 'embed_documents' method takes a list of strings and returns a list
# of embedding vectors, where each vector corresponds to a document in the input list.
# This is more efficient than embedding each document individually when dealing with batches.
result = embedding.embed_documents(documents)

# Print the list of resulting embedding vectors.
# The 'result' will be a list of lists of floats, where each inner list is
# the embedding vector for a corresponding document from the 'documents' list.
print(str(result))

[[0.04623187333345413, -0.03608284145593643, 0.08614983409643173, 0.19587160646915436, -0.01688387058675289, -0.08273564279079437, -0.26228460669517517, 0.27294811606407166, 0.15611733496189117, -0.36012688279151917, 0.1310487687587738, 0.25274360179901123, 0.030470475554466248, -0.07024812698364258, -0.1956845223903656, -0.06631946563720703, -0.3105509579181671, 0.1813729852437973, 0.32383355498313904, -0.07913437485694885, 0.05944431945681572, -0.18661120533943176, -0.02384086698293686, 0.12945859134197235, 0.26060089468955994, 0.2607879936695099, 0.022648239508271217, 0.1583622843027115, 0.1851145625114441, 0.07735712081193924, -0.031078482046723366, -0.1333872526884079], [-0.1305832415819168, 0.022755296900868416, 0.14982350170612335, -0.09551817178726196, -0.12261391431093216, -0.002385463798418641, -0.38639891147613525, 0.10411366820335388, 0.15471895039081573, -0.42738404870033264, -0.08720729500055313, 0.25160327553749084, -0.23384305834770203, 0.01739022694528103, 0.0258861072