# Experiments with embeddings

In [1]:
import numpy as np
from tabulate import tabulate

import embed

In [2]:
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

## `embed_one` and `embed_many`

These functions use `openai.Embedding.create`.

### `embed.embed_one`

In [3]:
embedding = embed.embed_one('Your text string goes here')

In [4]:
embedding

array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [5]:
type(embedding)

numpy.ndarray

In [6]:
cat = embed.embed_one('cat')
gato = embed.embed_one('gato')

In [7]:
np.dot(cat, gato)

0.84436065

In [8]:
np.dot(cat, embedding)

0.7642809

In [9]:
catrun_en = embed.embed_one('The cat runs.')
catrun_es = embed.embed_one('El gato corre.')
dogwalk_en = embed.embed_one('The dog walks.')
dogwalk_es = embed.embed_one('El perro camina.')

In [10]:
np.dot(catrun_en, catrun_es)

0.9224194

In [11]:
np.dot(catrun_en, embedding)

0.7650559

In [12]:
np.dot(dogwalk_en, dogwalk_es)

0.9269187

In [13]:
np.dot(catrun_en, dogwalk_en)

0.8857113

In [14]:
np.dot(catrun_es, dogwalk_es)

0.8966038

In [15]:
np.dot(catrun_en, dogwalk_es)

0.8422663

In [16]:
np.dot(catrun_es, dogwalk_en)

0.83059084

In [17]:
catrun_en.shape

(1536,)

### `embed.embed_many`

In [18]:
many = embed.embed_many([
    'Your text string goes here',
    'The cat runs.',
    'El gato corre.',
    'The dog walks.',
    'El perro camina.',
])
many.shape

(5, 1536)

In [19]:
many

array([[-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
        -1.7028622e-02, -8.8358240e-05, -2.4030920e-02],
       [-9.0430975e-03, -4.0786345e-03, -1.1095160e-02, ...,
        -2.4764959e-02,  5.5921902e-03, -2.4191400e-02],
       [-1.4204165e-02, -2.9433765e-03,  4.0550550e-04, ...,
        -8.1148576e-03,  4.9277819e-03, -6.8172398e-03],
       [ 9.5597431e-03, -6.3604913e-03, -5.9328689e-03, ...,
        -1.2309198e-02, -3.6862658e-04, -1.7130248e-02],
       [ 3.3449344e-03, -8.1113884e-03,  1.7457254e-03, ...,
         9.9317353e-05, -9.5703155e-03, -5.5582649e-03]], dtype=float32)

In [20]:
np.dot(many[0], many[1])

0.7650559

In [21]:
np.dot(catrun_en, catrun_es)

0.9224194

In [22]:
small = np.array([[1, 2], [3, 4]])
small

array([[1, 2],
       [3, 4]])

In [23]:
small.transpose()

array([[1, 3],
       [2, 4]])

In [24]:
table = many @ many.transpose()
table

array([[0.99999976, 0.76505584, 0.7345198 , 0.75117296, 0.7228675 ],
       [0.76505584, 1.        , 0.9224138 , 0.8857111 , 0.8422665 ],
       [0.7345198 , 0.9224138 , 1.        , 0.8305976 , 0.89663   ],
       [0.75117296, 0.8857111 , 0.8305976 , 1.0000002 , 0.9269196 ],
       [0.7228675 , 0.8422665 , 0.89663   , 0.9269196 , 1.        ]],
      dtype=float32)

In [25]:
tabulate(table, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
0.999999762,0.765055835,0.73451978,0.75117296,0.722867489
0.765055835,1.0,0.922413826,0.885711074,0.8422665
0.73451978,0.922413826,1.0,0.830597579,0.896629989
0.75117296,0.885711074,0.830597579,1.000000238,0.92691958
0.722867489,0.8422665,0.896629989,0.92691958,1.0


## `embed_one_eu` and `embed_many_eu`

These functions use functions in `openai.embeddings_utils`.

### `embed.embed_one_eu`

In [26]:
embedding_eu = embed.embed_one_eu('Your text string goes here')

In [27]:
embedding_eu

array([-7.0175570e-03, -5.3303856e-03,  1.1863974e-02, ...,
       -1.7006150e-02, -8.3129737e-05, -2.4037149e-02], dtype=float32)

In [28]:
type(embedding_eu)

numpy.ndarray

In [29]:
cat_eu = embed.embed_one_eu('cat')
gato_eu = embed.embed_one_eu('gato')

In [30]:
np.dot(cat_eu, gato_eu)

0.84464866

In [31]:
np.dot(cat_eu, embedding_eu)

0.76448596

In [32]:
catrun_en_eu = embed.embed_one_eu('The cat runs.')
catrun_es_eu = embed.embed_one_eu('El gato corre.')
dogwalk_en_eu = embed.embed_one_eu('The dog walks.')
dogwalk_es_eu = embed.embed_one_eu('El perro camina.')

In [33]:
np.dot(catrun_en_eu, catrun_es_eu)

0.9224139

In [34]:
np.dot(catrun_en_eu, embedding_eu)

0.7649616

In [35]:
np.dot(dogwalk_en_eu, dogwalk_es_eu)

0.9263848

In [36]:
np.dot(catrun_en_eu, dogwalk_en_eu)

0.8857113

In [37]:
np.dot(catrun_es_eu, dogwalk_es_eu)

0.89643717

In [38]:
np.dot(catrun_en_eu, dogwalk_es_eu)

0.84225607

In [39]:
np.dot(catrun_es_eu, dogwalk_en_eu)

0.8305974

In [40]:
catrun_en_eu.shape

(1536,)

### `embed.embed_many_eu`

In [41]:
many_eu = embed.embed_many_eu([
    'Your text string goes here',
    'The cat runs.',
    'El gato corre.',
    'The dog walks.',
    'El perro camina.',
])
many.shape

(5, 1536)

In [42]:
many_eu

array([[-6.92928350e-03, -5.33642201e-03,  1.18758921e-02, ...,
        -1.70174073e-02, -4.54713227e-05, -2.40475051e-02],
       [-9.09975544e-03, -4.11655614e-03, -1.11006815e-02, ...,
        -2.47758050e-02,  5.55989938e-03, -2.42022909e-02],
       [-1.42147215e-02, -2.92078988e-03,  3.90018802e-04, ...,
        -8.08201265e-03,  4.93654609e-03, -6.90483581e-03],
       [ 9.59877204e-03, -6.36116648e-03, -5.89548331e-03, ...,
        -1.23801986e-02, -3.47282301e-04, -1.70813799e-02],
       [ 3.45983449e-03, -7.59354141e-03,  1.42495893e-03, ...,
         4.55097703e-04, -8.73538014e-03, -6.12724526e-03]], dtype=float32)

In [43]:
np.dot(many_eu[0], many_eu[1])

0.7649158

In [44]:
np.dot(catrun_en_eu, catrun_es_eu)

0.9224139

In [45]:
table_eu = many_eu @ many_eu.transpose()
table_eu

array([[1.0000004 , 0.7649161 , 0.73437065, 0.7510821 , 0.7233679 ],
       [0.7649161 , 1.0000002 , 0.92235714, 0.885664  , 0.8423544 ],
       [0.73437065, 0.92235714, 1.0000001 , 0.8305839 , 0.8966306 ],
       [0.7510821 , 0.885664  , 0.8305839 , 1.0000004 , 0.9264928 ],
       [0.7233679 , 0.8423544 , 0.8966306 , 0.9264928 , 0.99999976]],
      dtype=float32)

In [46]:
tabulate(table_eu, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.764916122,0.734370649,0.751082122,0.723367929
0.764916122,1.000000238,0.922357142,0.885663986,0.842354417
0.734370649,0.922357142,1.000000119,0.83058387,0.896630585
0.751082122,0.885663986,0.83058387,1.000000358,0.92649281
0.723367929,0.842354417,0.896630585,0.92649281,0.999999762


## `embed_one_req` and `embed_many_req`

These functions use the `requests` library. They do not use the `openai` module.

### `embed.embed_one_req`

In [47]:
embedding_req = embed.embed_one_req('Your text string goes here')

In [48]:
embedding_req

array([-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
       -1.7017407e-02, -4.5471323e-05, -2.4047505e-02], dtype=float32)

In [49]:
type(embedding_req)

numpy.ndarray

In [50]:
cat_req = embed.embed_one_req('cat')
gato_req = embed.embed_one_req('gato')

In [51]:
np.dot(cat_req, gato_req)

0.8444619

In [52]:
np.dot(cat_req, embedding_req)

0.76415867

In [53]:
catrun_en_req = embed.embed_one_req('The cat runs.')
catrun_es_req = embed.embed_one_req('El gato corre.')
dogwalk_en_req = embed.embed_one_req('The dog walks.')
dogwalk_es_req = embed.embed_one_req('El perro camina.')

In [54]:
np.dot(catrun_en_req, catrun_es_req)

0.9224324

In [55]:
np.dot(catrun_en_req, embedding_req)

0.7649304

In [56]:
np.dot(dogwalk_en_req, dogwalk_es_req)

0.92692006

In [57]:
np.dot(catrun_en_req, dogwalk_en_req)

0.88560975

In [58]:
np.dot(catrun_es_req, dogwalk_es_req)

0.8966301

In [59]:
np.dot(catrun_en_req, dogwalk_es_req)

0.8422076

In [60]:
np.dot(catrun_es_req, dogwalk_en_req)

0.8305751

In [61]:
catrun_en_req.shape

(1536,)

### `embed.embed_many_req`

In [62]:
many_req = embed.embed_many_req([
    'Your text string goes here',
    'The cat runs.',
    'El gato corre.',
    'The dog walks.',
    'El perro camina.',
])
many.shape

(5, 1536)

In [63]:
many_req

array([[-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
        -1.7028622e-02, -8.8358240e-05, -2.4030920e-02],
       [-9.0430975e-03, -4.0786345e-03, -1.1095160e-02, ...,
        -2.4764959e-02,  5.5921902e-03, -2.4191400e-02],
       [-1.4204165e-02, -2.9433765e-03,  4.0550550e-04, ...,
        -8.1148576e-03,  4.9277819e-03, -6.8172398e-03],
       [ 9.5597431e-03, -6.3604913e-03, -5.9328689e-03, ...,
        -1.2309198e-02, -3.6862658e-04, -1.7130248e-02],
       [ 3.3449344e-03, -8.1113884e-03,  1.7457254e-03, ...,
         9.9317353e-05, -9.5703155e-03, -5.5582649e-03]], dtype=float32)

In [64]:
np.dot(many_req[0], many_req[1])

0.7650559

In [65]:
np.dot(catrun_en_req, catrun_es_req)

0.9224324

In [66]:
table_req = many_req @ many_req.transpose()
table_req

array([[0.99999976, 0.76505584, 0.7345198 , 0.75117296, 0.7228675 ],
       [0.76505584, 1.        , 0.9224138 , 0.8857111 , 0.8422665 ],
       [0.7345198 , 0.9224138 , 1.        , 0.8305976 , 0.89663   ],
       [0.75117296, 0.8857111 , 0.8305976 , 1.0000002 , 0.9269196 ],
       [0.7228675 , 0.8422665 , 0.89663   , 0.9269196 , 1.        ]],
      dtype=float32)

In [67]:
tabulate(table_req, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
0.999999762,0.765055835,0.73451978,0.75117296,0.722867489
0.765055835,1.0,0.922413826,0.885711074,0.8422665
0.73451978,0.922413826,1.0,0.830597579,0.896629989
0.75117296,0.885711074,0.830597579,1.000000238,0.92691958
0.722867489,0.8422665,0.896629989,0.92691958,1.0
