# Experiments with embeddings

In [1]:
import numpy as np
from tabulate import tabulate

import embed

In [2]:
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

## `embed_one` and `embed_many`

These functions use `openai.Embedding.create`.

### `embed.embed_one`

In [3]:
embedding = embed.embed_one("Your text string goes here")

In [4]:
embedding

array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [5]:
type(embedding)

numpy.ndarray

In [6]:
cat = embed.embed_one("cat")
gato = embed.embed_one("gato")

In [7]:
np.dot(cat, gato)

0.8447363

In [8]:
np.dot(cat, embedding)

0.76458704

In [9]:
catrun_en = embed.embed_one("The cat runs.")
catrun_es = embed.embed_one("El gato corre.")
dogwalk_en = embed.embed_one("The dog walks.")
dogwalk_es = embed.embed_one("El perro camina.")

In [10]:
np.dot(catrun_en, catrun_es)

0.92243814

In [11]:
np.dot(catrun_en, embedding)

0.7650094

In [12]:
np.dot(dogwalk_en, dogwalk_es)

0.92638606

In [13]:
np.dot(catrun_en, dogwalk_en)

0.88560975

In [14]:
np.dot(catrun_es, dogwalk_es)

0.89640963

In [15]:
np.dot(catrun_en, dogwalk_es)

0.8421978

In [16]:
np.dot(catrun_es, dogwalk_en)

0.8305687

In [17]:
catrun_en.shape

(1536,)

### `embed.embed_many`

In [18]:
many = embed.embed_many([
    "Your text string goes here",
    "The cat runs.",
    "El gato corre.",
    "The dog walks.",
    "El perro camina.",
])
many.shape

(5, 1536)

In [19]:
many

array([[-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
        -1.7017407e-02, -4.5471323e-05, -2.4047505e-02],
       [-9.1028195e-03, -4.1309856e-03, -1.1052059e-02, ...,
        -2.4805024e-02,  5.5642496e-03, -2.4155278e-02],
       [-1.4176358e-02, -2.9412776e-03,  3.8862362e-04, ...,
        -8.0564748e-03,  4.8509724e-03, -6.8856594e-03],
       [ 9.5758662e-03, -6.3913045e-03, -5.9445151e-03, ...,
        -1.2389688e-02, -3.8638557e-04, -1.7123753e-02],
       [ 3.4075065e-03, -7.4890251e-03,  1.3449042e-03, ...,
         4.3373939e-04, -8.7621594e-03, -6.0973149e-03]], dtype=float32)

In [20]:
np.dot(many[0], many[1])

0.7650132

In [21]:
np.dot(catrun_en, catrun_es)

0.92243814

In [22]:
small = np.array([[1, 2], [3, 4]])
small

array([[1, 2],
       [3, 4]])

In [23]:
small.transpose()

array([[1, 3],
       [2, 4]])

In [24]:
table = many @ many.transpose()
table

array([[1.0000004 , 0.7650132 , 0.73435843, 0.75096744, 0.7232142 ],
       [0.7650132 , 0.9999998 , 0.92241925, 0.88574004, 0.84237677],
       [0.73435843, 0.92241925, 0.9999999 , 0.8304955 , 0.8965036 ],
       [0.75096744, 0.88574004, 0.8304955 , 1.0000004 , 0.92635864],
       [0.7232142 , 0.84237677, 0.8965036 , 0.92635864, 1.0000005 ]],
      dtype=float32)

In [25]:
tabulate(table, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.765013218,0.73435843,0.750967443,0.723214209
0.765013218,0.999999821,0.92241925,0.885740042,0.842376769
0.73435843,0.92241925,0.999999881,0.830495477,0.896503627
0.750967443,0.885740042,0.830495477,1.000000358,0.92635864
0.723214209,0.842376769,0.896503627,0.92635864,1.000000477


## `embed_one_eu` and `embed_many_eu`

These functions use functions in `openai.embeddings_utils`.

### `embed.embed_one_eu`

In [26]:
embedding_eu = embed.embed_one_eu("Your text string goes here")

In [27]:
embedding_eu

array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [28]:
type(embedding_eu)

numpy.ndarray

In [29]:
cat_eu = embed.embed_one_eu("cat")
gato_eu = embed.embed_one_eu("gato")

In [30]:
np.dot(cat_eu, gato_eu)

0.8447363

In [31]:
np.dot(cat_eu, embedding_eu)

0.76458704

In [32]:
catrun_en_eu = embed.embed_one_eu("The cat runs.")
catrun_es_eu = embed.embed_one_eu("El gato corre.")
dogwalk_en_eu = embed.embed_one_eu("The dog walks.")
dogwalk_es_eu = embed.embed_one_eu("El perro camina.")

In [33]:
np.dot(catrun_en_eu, catrun_es_eu)

0.9224265

In [34]:
np.dot(catrun_en_eu, embedding_eu)

0.76505

In [35]:
np.dot(dogwalk_en_eu, dogwalk_es_eu)

0.92642254

In [36]:
np.dot(catrun_en_eu, dogwalk_en_eu)

0.8857316

In [37]:
np.dot(catrun_es_eu, dogwalk_es_eu)

0.89640963

In [38]:
np.dot(catrun_en_eu, dogwalk_es_eu)

0.84227383

In [39]:
np.dot(catrun_es_eu, dogwalk_en_eu)

0.8305994

In [40]:
catrun_en_eu.shape

(1536,)

### `embed.embed_many_eu`

In [41]:
many_eu = embed.embed_many_eu([
    "Your text string goes here",
    "The cat runs.",
    "El gato corre.",
    "The dog walks.",
    "El perro camina.",
])
many.shape

(5, 1536)

In [42]:
many_eu

array([[-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
        -1.7017407e-02, -4.5471323e-05, -2.4047505e-02],
       [-9.1028195e-03, -4.1309856e-03, -1.1052059e-02, ...,
        -2.4805024e-02,  5.5642496e-03, -2.4155278e-02],
       [-1.4176358e-02, -2.9412776e-03,  3.8862362e-04, ...,
        -8.0564748e-03,  4.8509724e-03, -6.8856594e-03],
       [ 9.5758662e-03, -6.3913045e-03, -5.9445151e-03, ...,
        -1.2389688e-02, -3.8638557e-04, -1.7123753e-02],
       [ 3.4267190e-03, -7.4588875e-03,  1.3607008e-03, ...,
         4.5837829e-04, -8.7634120e-03, -6.1512413e-03]], dtype=float32)

In [43]:
np.dot(many_eu[0], many_eu[1])

0.7650132

In [44]:
np.dot(catrun_en_eu, catrun_es_eu)

0.9224265

In [45]:
table_eu = many_eu @ many_eu.transpose()
table_eu

array([[1.0000004 , 0.7650132 , 0.73435843, 0.75096744, 0.7232273 ],
       [0.7650132 , 0.9999998 , 0.92241925, 0.88574004, 0.8423008 ],
       [0.73435843, 0.92241925, 0.9999999 , 0.8304955 , 0.8964095 ],
       [0.75096744, 0.88574004, 0.8304955 , 1.0000004 , 0.92637044],
       [0.7232273 , 0.8423008 , 0.8964095 , 0.92637044, 1.0000002 ]],
      dtype=float32)

In [46]:
tabulate(table_eu, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.765013218,0.73435843,0.750967443,0.723227322
0.765013218,0.999999821,0.92241925,0.885740042,0.842300773
0.73435843,0.92241925,0.999999881,0.830495477,0.896409512
0.750967443,0.885740042,0.830495477,1.000000358,0.926370442
0.723227322,0.842300773,0.896409512,0.926370442,1.000000238
