In [1]:
import numpy as np
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('paraphrase-MiniLM-L3-v2') # try more powerful model: all-mpnet-base-v2

In [3]:
#Sentences we want to encode. Example:
sentence = [
    'This blanket has such a cozy temperature for me!',
    'I am so much warmer and snug using this spread!',
    'Taylor Swift was 34 years old in 2024.']

In [4]:
#Sentences are encoded by calling model.encode()
embedding = model.encode(sentence)

#Preview the embeddings
print(embedding)
embedding.shape

[[ 0.422886    0.11801935  0.5898904  ... -0.0799733   0.16128042
   0.1529056 ]
 [ 0.08642201 -0.0195312   0.3944788  ... -0.19359107  0.27264503
   0.23386721]
 [ 0.02534099  0.47614315  0.11544227 ... -0.10391905  0.5816903
  -0.7600805 ]]


(3, 384)

In [5]:
# Euclidean Distance function
def euclidean_distance(vec1, vec2):
    return np.linalg.norm(vec1 - vec2)

In [6]:
# Euclidean Distance
print("Euclidean Distance: Review 1 vs Review 2:", euclidean_distance(embedding[0], embedding[1]))
print("Euclidean Distance: Review 1 vs Random Comment:", euclidean_distance(embedding[0], embedding[2]))
print("Euclidean Distance: Review 2 vs Random Comment:", euclidean_distance(embedding[1], embedding[2]))

Euclidean Distance: Review 1 vs Review 2: 4.6202903
Euclidean Distance: Review 1 vs Random Comment: 7.313547
Euclidean Distance: Review 2 vs Random Comment: 6.338903


In [7]:
# Dot Product
print("Dot Product: Review 1 vs Review 2:", np.dot(embedding[0], embedding[1]))
print("Dot Product: Review 1 vs Random Comment:", np.dot(embedding[0], embedding[2]))
print("Dot Product: Review 2 vs Random Comment:", np.dot(embedding[1], embedding[2]))

Dot Product: Review 1 vs Review 2: 12.270496
Dot Product: Review 1 vs Random Comment: -0.76546323
Dot Product: Review 2 vs Random Comment: 0.9524095


In [8]:
# Cosine Distance function
def cosine_distance(vec1,vec2):
  cosine = 1 - abs((np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))))
  return cosine

In [9]:
# Cosine Distance
print("Cosine Distance: Review 1 vs Review 2:", cosine_distance(embedding[0], embedding[1]))
print("Cosine Distance: Review 1 vs Random Comment:", cosine_distance(embedding[0], embedding[2]))
print("Cosine Distance: Review 2 vs Random Comment:", cosine_distance(embedding[1], embedding[2]))

Cosine Distance: Review 1 vs Review 2: 0.4523802399635315
Cosine Distance: Review 1 vs Random Comment: 0.9704555775970221
Cosine Distance: Review 2 vs Random Comment: 0.9542623534798622
