In [1]:
import ipynb.fs
from .defs.ELMo import ELMo, Tokenizer
import torch
import numpy as np
import math
from numpy import dot
from numpy.linalg import norm

In [2]:
model_path='./elmo_model'

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('device: ' + str(device))

device: cuda


In [4]:
model = ELMo.from_checkpoint(model_path)

In [5]:
def get_embedding(text,word,model,device):
    model.to(device)
    tokenizer = model.tokenizer
    idx = text.split().index(word)
    model.eval()
    with torch.no_grad():
        w ,c , _ = tokenizer.tokenize(text,max_length=model.config["elmo"]["max_length"])
        w = w.unsqueeze(0).to(device)
        c = c.unsqueeze(0).to(device)
        f , b = model(w,c)
        encoder_embedding = f[0][0][idx].cpu().detach().numpy()
        hv =[]
        for i in range(1,len(f)):
            hv.append(torch.cat((f[i][0][idx],b[i][0][idx])).cpu().detach().numpy())
    h_mean = np.mean(hv,axis=0)
    return h_mean

In [6]:
def euclidean_distance(x1,x2):
    return math.sqrt(((x1-x2)**2).cumsum()[-1])

In [7]:
def cos(a,b):
    return dot(a, b)/(norm(a)*norm(b))

In [8]:
text = "The orange from the supermarket was juicy and tasty. Their fruits are the best"
word = "orange"

In [9]:
orange1 = get_embedding(text,word,model,device)

In [10]:
text = "The artist painted his house with a tint of orange color and it looked vibrant"
word = "orange"
orange2 = get_embedding(text,word,model,device)

In [11]:
text = "The painting has a lot of vibrant colors and it looked beautiful."
word = "vibrant"
vibrant = get_embedding(text,word,model,device)

In [12]:
cos(orange1,vibrant)

0.8141281

In [13]:
cos(orange2,vibrant)

0.9127989