In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np

In [2]:
#load the ELMo model
elmo = hub.load("https://tfhub.dev/google/elmo/3")

#define the sentences
sentences = ["serendipity refers to the occurrence of fortunate discoveries or pleasant surprises by chance or accident, often leading to unexpected insights or breakthroughs","In scientific research serendipity can play a significant role in the discovery of new phenomena or the development of novel technologies where unexpected observations lead to valuable advancements","serendipity in everyday life can manifest as unexpected encounters, fortunate coincidences, or unplanned opportunities that bring joy or positive outcomes beyond one's anticipation or control"]

#generate ELMo embeddings for the sentences
embeddings = elmo.signatures["default"](tf.constant(sentences))["elmo"]

In [3]:
serendipity1_embedding = embeddings.numpy()[0][0]
serendipity2_embedding = embeddings.numpy()[1][3]
serendipity3_embedding = embeddings.numpy()[2][0]

In [4]:
print("Embedding vector for 'serendipity' in the 1st sentence:", serendipity1_embedding)
print("Embedding vector for 'serendipity' in the 2nd sentence:", serendipity2_embedding)
print("Embedding vector for 'serendipity' in the 3rd sentence:", serendipity3_embedding)
embeddings.numpy().shape

Embedding vector for 'serendipity' in the 1st sentence: [-0.64826554 -0.03306     0.56144047 ...  0.26122016  1.0334305
 -0.48186374]
Embedding vector for 'serendipity' in the 2nd sentence: [-0.36746502 -0.25524986  0.28895625 ...  0.28983784  0.796364
 -0.4218786 ]
Embedding vector for 'serendipity' in the 3rd sentence: [-0.64826554 -0.03306     0.56144047 ...  0.34038064  0.98082066
 -0.6427165 ]


(3, 28, 1024)

In [6]:
def cosine_similarity(v1, v2):
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    return dot_product / (norm_v1 * norm_v2)

In [17]:
similarity0 = cosine_similarity(serendipity1_embedding, serendipity2_embedding)
print("Cosine similarity between 'serendipity' and 'serendipity' in the 1st and 2nd sentences is :", similarity0)

Cosine similarity between 'serendipity' and 'serendipity' in the 1st and 2nd sentences is : 0.81077874


In [18]:
similarity1 = cosine_similarity(serendipity1_embedding, serendipity3_embedding)
print("Cosine similarity between 'serendipity' and 'serendipity' in the 2nd and 3rd sentences is :", similarity1)

Cosine similarity between 'serendipity' and 'serendipity' in the 2nd and 3rd sentences is : 0.90145475


In [19]:
similarity2 = cosine_similarity(serendipity2_embedding, serendipity3_embedding)
print("Cosine similarity between 'serendipity' and 'serendipity' in the 2nd and 3rd sentences is :", similarity2)

Cosine similarity between 'serendipity' and 'serendipity' in the 2nd and 3rd sentences is : 0.7771109


In [20]:
similarit3 = cosine_similarity(serendipity1_embedding, serendipity1_embedding)
print("Cosine similarity between 'serendipity' and 'serendipity' in the 1st and 1st sentences is :", similarit3)

Cosine similarity between 'serendipity' and 'serendipity' in the 1st and 1st sentences is : 1.0


In [21]:
f1st_embedding = embeddings.numpy()[0]
s2nd_embedding = embeddings.numpy()[1]
f1st_embedding.shape
s2nd_embedding.shape

(28, 1024)

In [22]:
print("Embedding vector for the 1st sentence:", f1st_embedding)
print("Embedding vector for the 1st sentence shape :", f1st_embedding.shape)
print("Embedding vector for the 2nd sentence:", s2nd_embedding)
print("Embedding vector for the 2nd sentence shape :", s2nd_embedding.shape)

Embedding vector for the 1st sentence: [[-0.64826554 -0.03306     0.56144047 ...  0.26122016  1.0334305
  -0.48186374]
 [ 0.13555491 -0.18948965  0.41173175 ... -0.18742858  1.0834882
   0.8884053 ]
 [ 0.00868468  0.18670303 -0.01726137 ...  0.17295986  0.22947119
   0.1356568 ]
 ...
 [-0.0284084  -0.04353216  0.04130162 ...  0.02583168 -0.01429836
  -0.01650422]
 [-0.0284084  -0.04353216  0.04130162 ...  0.02583168 -0.01429836
  -0.01650422]
 [-0.0284084  -0.04353216  0.04130162 ...  0.02583168 -0.01429836
  -0.01650422]]
Embedding vector for the 1st sentence shape : (28, 1024)
Embedding vector for the 2nd sentence: [[ 0.20496729  0.24360688 -0.17271766 ... -0.8247985  -0.14472914
   0.22244819]
 [-0.23900719 -0.5670283  -0.28810012 ...  0.5002324  -0.06013428
  -0.38239735]
 [-0.29120722 -0.00388171  0.49713814 ...  0.46386385  0.2911051
  -0.1347229 ]
 ...
 [-0.01142496  0.8384342  -0.59252775 ...  0.55217826  0.5711925
   0.06236884]
 [-0.07599644  0.5650156  -0.5937175  ...  0.213

In [48]:
def cosine_similarity_trans(v1, v2):
    dot_product = np.sum(v1 * v2) # Computing the dot product of flattened arrays
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    cosine_similarities = dot_product / (norm_v1 * norm_v2)
    return cosine_similarities

In [49]:
similarity4 = cosine_similarity_trans(f1st_embedding, s2nd_embedding)
print("Cosine similarity between 'f1st_embedding' and 's2nd_embedding' sentences is :", similarity4)

Cosine similarity between 'f1st_embedding' and 's2nd_embedding' sentences is : 0.17604995
