# Experiments with embeddings

In [1]:
import numpy as np
from tabulate import tabulate

from embed import embed_one, embed_one_req, embed_many, embed_many_req

In [2]:
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

In [3]:
embedding = embed_one("Your text string goes here")

In [4]:
embedding

array([-6.9754184e-03, -5.3491648e-03,  1.1907940e-02, ...,
       -1.7028622e-02, -8.8358240e-05, -2.4030920e-02], dtype=float32)

In [5]:
type(embedding)

numpy.ndarray

In [6]:
cat = embed_one("cat")
gato = embed_one("gato")

In [7]:
np.dot(cat, gato)

0.8447504

In [8]:
np.dot(cat, embedding)

0.76458704

In [9]:
catrun_en = embed_one("The cat runs.")
catrun_es = embed_one("El gato corre.")
dogwalk_en = embed_one("The dog walks.")
dogwalk_es = embed_one("El perro camina.")

In [10]:
np.dot(catrun_en, catrun_es)

0.92243814

In [11]:
np.dot(catrun_en, embedding)

0.7650094

In [12]:
np.dot(dogwalk_en, dogwalk_es)

0.92638606

In [13]:
np.dot(catrun_en, dogwalk_en)

0.88560975

In [14]:
np.dot(catrun_es, dogwalk_es)

0.89640963

In [15]:
np.dot(catrun_en, dogwalk_es)

0.8421978

In [16]:
np.dot(catrun_es, dogwalk_en)

0.8305687

In [17]:
catrun_en.shape

(1536,)

In [18]:
many = embed_many([
    "Your text string goes here",
    "The cat runs.",
    "El gato corre.",
    "The dog walks.",
    "El perro camina.",
])
many.shape

(5, 1536)

In [19]:
many

array([[-6.9292835e-03, -5.3364220e-03,  1.1875892e-02, ...,
        -1.7017407e-02, -4.5471323e-05, -2.4047505e-02],
       [-9.0776253e-03, -4.0546730e-03, -1.1039667e-02, ...,
        -2.4818547e-02,  5.5739805e-03, -2.4181521e-02],
       [-1.4264697e-02, -2.9127449e-03,  4.0621794e-04, ...,
        -8.0626551e-03,  4.8730331e-03, -6.8412321e-03],
       [ 9.6240090e-03, -6.3674315e-03, -5.9017539e-03, ...,
        -1.2380062e-02, -3.3599290e-04, -1.7081190e-02],
       [ 3.3871806e-03, -7.5423303e-03,  1.3486285e-03, ...,
         4.5071356e-04, -8.7910602e-03, -6.1375089e-03]], dtype=float32)

In [20]:
np.dot(many[0], many[1])

0.7649706

In [21]:
np.dot(catrun_en, catrun_es)

0.92243814

In [22]:
small = np.array([[1, 2], [3, 4]])
small

array([[1, 2],
       [3, 4]])

In [23]:
small.transpose()

array([[1, 3],
       [2, 4]])

In [24]:
table = many @ many.transpose()
table

array([[1.0000004 , 0.7649705 , 0.7343621 , 0.7510288 , 0.7232301 ],
       [0.7649705 , 0.9999997 , 0.92237246, 0.88573134, 0.8423364 ],
       [0.7343621 , 0.92237246, 1.0000001 , 0.83060044, 0.89653635],
       [0.7510288 , 0.88573134, 0.83060044, 1.0000002 , 0.92640686],
       [0.7232301 , 0.8423364 , 0.89653635, 0.92640686, 1.0000002 ]],
      dtype=float32)

In [25]:
tabulate(table, tablefmt='html', floatfmt='.9f')

0,1,2,3,4
1.000000358,0.764970481,0.734362125,0.751028776,0.723230124
0.764970481,0.999999702,0.92237246,0.885731339,0.842336416
0.734362125,0.92237246,1.000000119,0.830600441,0.89653635
0.751028776,0.885731339,0.830600441,1.000000238,0.92640686
0.723230124,0.842336416,0.89653635,0.92640686,1.000000238


In [26]:
embeddings = embed_many_req(["The dog walks.", "El perro camina."])
embeddings

array([[ 9.5597431e-03, -6.3604913e-03, -5.9328689e-03, ...,
        -1.2309198e-02, -3.6862658e-04, -1.7130248e-02],
       [ 3.3449344e-03, -8.1113884e-03,  1.7457254e-03, ...,
         9.9317353e-05, -9.5703155e-03, -5.5582649e-03]], dtype=float32)

In [27]:
np.dot(embeddings[0], embeddings[1])

0.9269187

In [28]:
dogwalk_en_req = embed_one_req("The dog walks.")
dogwalk_es_req = embed_one_req("El perro camina.")
np.dot(dogwalk_en_req, dogwalk_es_req)

0.9269187