# OpenAI API

In [1]:
# Import the necessary modules for the script to run

import openai
from openai.embeddings_utils import get_embedding, get_embeddings   # !pip install openai=0.27.7

In [19]:
# Setiing the OpenAI key using the value stored in the environment variable

import os
openai.api_key = os.environ.get('OPENAI_API_KEY')

In [13]:
# Setting the engine to be used for text embedding

ENGINE = 'text-embedding-ada-002'

In [14]:
# Generating the vector representation of the given text using the specified engine

embedded_text = get_embedding('I love to be vectorized', engine=ENGINE)

In [18]:
# Checking the length of the resulting vector to ensure it is the expected size (1536)
len(embedded_text) == 1536

True

# Getting text embeddings from a pre-trained open-source bi-encoder

In [2]:
# Importing the SentenceTransformer library

from sentence_transformers import SentenceTransformer  # pip install sentence-transformers

  from tqdm.autonotebook import tqdm, trange


In [4]:
# Initializing a SentenceTransforer model with the 'multi-qa-mpnet-base-cos-v1' pre-trained model
model = SentenceTransformer(
    'sentence-transformers/multi-qa-mpnet-base-cos-v1'
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
# Defining a list of documents to generate embeddings for
docs = [
    "Around 9 million people live in London",
    "London is known for its financial district"
]

In [7]:
# Generating vector embeddings for the documents
doc_emb = model.encode(
    docs,           # our documents (an iterable of strings)
    batch_size=32,   # batch the embeddings by this size
    show_progress_bar=True  # display a progress bar
)

Batches: 100%|██████████| 1/1 [00:02<00:00,  2.39s/it]


In [8]:
# The shape of the embeddings is (2, 768), indicating a length of 768 and two embeddings generated
doc_emb.shape

(2, 768)