# OpenAI Embeddings

Use OpenAI's embedding model to compare sentence similarities.

In [24]:
import os
import openai
from dotenv import load_dotenv

load_dotenv()

openai.api_key  = os.environ['OPENAI_API_KEY']

In [25]:
sentence1 = "i like dogs"
sentence2 = "i like canines"
sentence3 = "the weather is ugly outside"

In [26]:
from time import sleep

def get_embedding(text, model="text-embedding-ada-002"):
    try:
        res = openai.Embedding.create(input=[text], model=model)
    except Exception as error:
        print("An error occurred:", error)
        done = False
        while not done:
            sleep(5)
            try:
                res = openai.Embedding.create(input=[text], model=model)
                done = True
            except:
                pass
    return res['data'][0]['embedding']

In [27]:
embedding1 = get_embedding(sentence1)
embedding2 = get_embedding(sentence2)
embedding3 = get_embedding(sentence3)

## Use Dot Product to Compare Embeddings

In [28]:
import numpy as np

## Compare embedding1 to embedding2

Since these 2 sentences are similar, their dot product score is high.

In [29]:
np.dot(embedding1, embedding2)

0.9631676073007296

## Compare embedding1 to embedding3

Since these 2 sentences are not similar, their score is significantly lower.

In [30]:
np.dot(embedding1, embedding3)

0.7710631888387288

## Compare embedding2 to embedding3

Since these 2 sentences are also not similar, their score is similar to the one above.

In [31]:
np.dot(embedding2, embedding3)

0.7596683332753217