Making embeddings

In [5]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
sentences = [
    "My name is Sourav Rawat."
]

In [6]:
embeddings = model.encode(sentences)

In [7]:
embeddings.shape

(1, 384)

In [8]:
embeddings

array([[-9.58562940e-02, -2.36315466e-02, -1.92582440e-02,
         4.12863754e-02, -6.00254610e-02, -1.73523203e-02,
         7.69573376e-02,  8.43273699e-02,  2.45410260e-02,
        -1.43452808e-02, -8.18684995e-02, -1.36495158e-01,
         1.19653798e-03, -2.30836924e-02,  7.78600061e-03,
        -5.27990684e-02,  4.26777974e-02,  1.07323937e-01,
        -4.00464982e-02, -1.72815576e-01, -7.34216869e-02,
         5.36169000e-02,  3.51459421e-02, -4.43749875e-02,
        -3.70024145e-02, -5.89034855e-02,  1.05429357e-02,
         6.77477643e-02,  3.98082193e-03, -1.09465733e-01,
         5.06068654e-02, -1.28316134e-02,  2.68261880e-02,
         9.89326760e-02, -3.42959389e-02,  3.76520678e-02,
        -1.21254414e-01,  1.98469926e-02, -5.20299887e-03,
         3.61431241e-02,  5.88356405e-02, -3.09627187e-02,
        -1.90517902e-02,  7.56763481e-03,  9.02708545e-02,
        -3.35841961e-02, -1.41919302e-02,  6.39462844e-02,
         5.95297515e-02,  1.67726595e-02, -6.06742986e-0

In [None]:
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")

Using Cosine similarity

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
print("Similarity between sentence1 and sentence2: ",cosine_similarity(embeddings[0].reshape(1,embeddings[0].shape[0]),embeddings[1].reshape(1,embeddings[1].shape[0])))

Similarity between sentence1 and sentence2:  [[0.8427639]]


Using numba

In [8]:
import numpy as np
from numba import jit

emb1, emb2 = np.array(embeddings[0]), np.array(embeddings[1])

@jit(nopython=True)
def cosine_similarity_numba(u:np.ndarray, v:np.ndarray):
    assert(u.shape[0] ==v.shape[0])
    uv=0
    uu=0
    vv=0
    for i in range(u.shape[0]):
        uv+=u[i]*v[i]
        uu+=u[i]*u[i]
        vv+=v[i]*v[i]
    cos_theta = 1
    if uu!= 0 and vv != 0:
        cos_theta = uv/np.sqrt(uu*vv)
    return cos_theta

print(cosine_similarity_numba(emb1,emb2))

0.14268613087564153
