# Experiments with embeddings

In [1]:
import numpy as np
from tabulate import tabulate

import embed

In [2]:
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

## `embed_one` and `embed_many`

These functions use `openai.Embedding.create`.

### `embed.embed_one`

In [3]:
embedding = embed.embed_one("Your text string goes here")

In [4]:
embedding

array([-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
       -1.7017407e-02, -4.5471323e-05, -2.4047505e-02], dtype=float32)

In [5]:
type(embedding)

numpy.ndarray

In [6]:
cat = embed.embed_one("cat")
gato = embed.embed_one("gato")

In [7]:
np.dot(cat, gato)

0.8446758

In [8]:
np.dot(cat, embedding)

0.76422906

In [9]:
catrun_en = embed.embed_one("The cat runs.")
catrun_es = embed.embed_one("El gato corre.")
dogwalk_en = embed.embed_one("The dog walks.")
dogwalk_es = embed.embed_one("El perro camina.")

In [10]:
np.dot(catrun_en, catrun_es)

0.9223838

In [11]:
np.dot(catrun_en, embedding)

0.7649304

In [12]:
np.dot(dogwalk_en, dogwalk_es)

0.92638606

In [13]:
np.dot(catrun_en, dogwalk_en)

0.88560975

In [14]:
np.dot(catrun_es, dogwalk_es)

0.89646125

In [15]:
np.dot(catrun_en, dogwalk_es)

0.8421978

In [16]:
np.dot(catrun_es, dogwalk_en)

0.8305696

In [17]:
catrun_en.shape

(1536,)

### `embed.embed_many`

In [18]:
many = embed.embed_many([
    "Your text string goes here",
    "The cat runs.",
    "El gato corre.",
    "The dog walks.",
    "El perro camina.",
])
many.shape

(5, 1536)

In [19]:
many

array([[-6.92928350e-03, -5.33642201e-03,  1.18758921e-02, ...,
        -1.70174073e-02, -4.54713227e-05, -2.40475051e-02],
       [-9.09975544e-03, -4.11655614e-03, -1.11006815e-02, ...,
        -2.47758050e-02,  5.55989938e-03, -2.42022909e-02],
       [-1.42147215e-02, -2.92078988e-03,  3.90018802e-04, ...,
        -8.08201265e-03,  4.93654609e-03, -6.90483581e-03],
       [ 9.59877204e-03, -6.36116648e-03, -5.89548331e-03, ...,
        -1.23801986e-02, -3.47282301e-04, -1.70813799e-02],
       [ 3.45983449e-03, -7.59354141e-03,  1.42495893e-03, ...,
         4.55097703e-04, -8.73538014e-03, -6.12724526e-03]], dtype=float32)

In [20]:
np.dot(many[0], many[1])

0.7649158

In [21]:
np.dot(catrun_en, catrun_es)

0.9223838

In [22]:
small = np.array([[1, 2], [3, 4]])
small

array([[1, 2],
       [3, 4]])

In [23]:
small.transpose()

array([[1, 3],
       [2, 4]])

In [24]:
table = many @ many.transpose()
table

array([[1.0000004 , 0.7649161 , 0.73437065, 0.7510821 , 0.7233679 ],
       [0.7649161 , 1.0000002 , 0.92235714, 0.885664  , 0.8423544 ],
       [0.73437065, 0.92235714, 1.0000001 , 0.8305839 , 0.8966306 ],
       [0.7510821 , 0.885664  , 0.8305839 , 1.0000004 , 0.9264928 ],
       [0.7233679 , 0.8423544 , 0.8966306 , 0.9264928 , 0.99999976]],
      dtype=float32)

In [25]:
tabulate(table, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.764916122,0.734370649,0.751082122,0.723367929
0.764916122,1.000000238,0.922357142,0.885663986,0.842354417
0.734370649,0.922357142,1.000000119,0.83058387,0.896630585
0.751082122,0.885663986,0.83058387,1.000000358,0.92649281
0.723367929,0.842354417,0.896630585,0.92649281,0.999999762


## `embed_one_eu` and `embed_many_eu`

These functions use functions in `openai.embeddings_utils`.

### `embed.embed_one_eu`

In [26]:
embedding_eu = embed.embed_one_eu("Your text string goes here")

In [27]:
embedding_eu

array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [28]:
type(embedding_eu)

numpy.ndarray

In [29]:
cat_eu = embed.embed_one_eu("cat")
gato_eu = embed.embed_one_eu("gato")

In [30]:
np.dot(cat_eu, gato_eu)

0.84464866

In [31]:
np.dot(cat_eu, embedding_eu)

0.76458704

In [32]:
catrun_en_eu = embed.embed_one_eu("The cat runs.")
catrun_es_eu = embed.embed_one_eu("El gato corre.")
dogwalk_en_eu = embed.embed_one_eu("The dog walks.")
dogwalk_es_eu = embed.embed_one_eu("El perro camina.")

In [33]:
np.dot(catrun_en_eu, catrun_es_eu)

0.92243814

In [34]:
np.dot(catrun_en_eu, embedding_eu)

0.7650094

In [35]:
np.dot(dogwalk_en_eu, dogwalk_es_eu)

0.92638606

In [36]:
np.dot(catrun_en_eu, dogwalk_en_eu)

0.88560975

In [37]:
np.dot(catrun_es_eu, dogwalk_es_eu)

0.89640963

In [38]:
np.dot(catrun_en_eu, dogwalk_es_eu)

0.8421978

In [39]:
np.dot(catrun_es_eu, dogwalk_en_eu)

0.8305687

In [40]:
catrun_en_eu.shape

(1536,)

### `embed.embed_many_eu`

In [41]:
many_eu = embed.embed_many_eu([
    "Your text string goes here",
    "The cat runs.",
    "El gato corre.",
    "The dog walks.",
    "El perro camina.",
])
many.shape

(5, 1536)

In [42]:
many_eu

array([[-6.92928350e-03, -5.33642201e-03,  1.18758921e-02, ...,
        -1.70174073e-02, -4.54713227e-05, -2.40475051e-02],
       [-9.06457007e-03, -4.09244280e-03, -1.10980421e-02, ...,
        -2.47713905e-02,  5.54583408e-03, -2.42741778e-02],
       [-1.42147215e-02, -2.92078988e-03,  3.90018802e-04, ...,
        -8.08201265e-03,  4.93654609e-03, -6.90483581e-03],
       [ 9.59877204e-03, -6.36116648e-03, -5.89548331e-03, ...,
        -1.23801986e-02, -3.47282301e-04, -1.70813799e-02],
       [ 3.45983449e-03, -7.59354141e-03,  1.42495893e-03, ...,
         4.55097703e-04, -8.73538014e-03, -6.12724526e-03]], dtype=float32)

In [43]:
np.dot(many_eu[0], many_eu[1])

0.7649304

In [44]:
np.dot(catrun_en_eu, catrun_es_eu)

0.92243814

In [45]:
table_eu = many_eu @ many_eu.transpose()
table_eu

array([[1.0000004 , 0.76493055, 0.73437065, 0.7510821 , 0.7233679 ],
       [0.76493055, 1.0000005 , 0.92238605, 0.8856448 , 0.8423538 ],
       [0.73437065, 0.92238605, 1.0000001 , 0.8305839 , 0.8966306 ],
       [0.7510821 , 0.8856448 , 0.8305839 , 1.0000004 , 0.9264928 ],
       [0.7233679 , 0.8423538 , 0.8966306 , 0.9264928 , 0.99999976]],
      dtype=float32)

In [46]:
tabulate(table_eu, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.764930546,0.734370649,0.751082122,0.723367929
0.764930546,1.000000477,0.92238605,0.885644794,0.842353821
0.734370649,0.92238605,1.000000119,0.83058387,0.896630585
0.751082122,0.885644794,0.83058387,1.000000358,0.92649281
0.723367929,0.842353821,0.896630585,0.92649281,0.999999762
