In [7]:
from transformers import BartTokenizer, BartModel
import torch

tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
model = BartModel.from_pretrained('facebook/bart-large')

In [None]:
def bart_cls_emb(sentences):
    inputs = tokenizer(sentences, return_tensors="pt")
    outputs = model(**inputs)

    last_hidden_states = outputs.last_hidden_state
    return last_hidden_states[:, 0]

def bart_mean_emb(sentences):
    inputs = tokenizer(sentences, return_tensors="pt")
    outputs = model(**inputs)

    last_hidden_states = outputs.last_hidden_state
    input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_states.size()).float()

    return torch.sum(last_hidden_states * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

In [41]:
def bart_pad_cls_emb(sentences):
    inputs = tokenizer(sentences, return_tensors="pt", padding=True)
    outputs = model(**inputs)

    last_hidden_states = outputs.last_hidden_state
    return last_hidden_states[:, 0]

def bart_pad_mean_emb(sentences):
    inputs = tokenizer(sentences, return_tensors="pt", padding=True)
    outputs = model(**inputs)

    last_hidden_states = outputs.last_hidden_state
    input_mask_expanded = inputs["attention_mask"].unsqueeze(-1).expand(last_hidden_states.size()).float()

    return torch.sum(last_hidden_states * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

In [38]:
a = "i love dogs"
b = "i like dogs"
c = "i hate cats"
d = "earth is yellow and flat"
e = "tonight is the dire of your fight"
abcde = [a, b, c, d, e]

In [39]:
for i in abcde:
    print(bart_cls_emb(i))

tensor([[ 0.9770,  0.5219, -0.5908,  ...,  0.9328,  0.1308, -0.5148]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.9248,  0.5461, -0.4824,  ...,  0.7143, -0.0326, -0.3788]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.8559,  0.5523, -0.5329,  ...,  0.8630,  0.0196, -0.4594]],
       grad_fn=<SelectBackward0>)
tensor([[ 1.1855,  0.7464, -0.8031,  ...,  0.0654,  0.3510, -0.7310]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.6374,  1.1278, -0.2586,  ..., -0.3083,  0.1837,  0.1355]],
       grad_fn=<SelectBackward0>)


In [42]:
for i in abcde:
    print(bart_pad_cls_emb(i))

tensor([[ 0.9770,  0.5219, -0.5908,  ...,  0.9328,  0.1308, -0.5148]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.9248,  0.5461, -0.4824,  ...,  0.7143, -0.0326, -0.3788]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.8559,  0.5523, -0.5329,  ...,  0.8630,  0.0196, -0.4594]],
       grad_fn=<SelectBackward0>)
tensor([[ 1.1855,  0.7464, -0.8031,  ...,  0.0654,  0.3510, -0.7310]],
       grad_fn=<SelectBackward0>)
tensor([[ 0.6374,  1.1278, -0.2586,  ..., -0.3083,  0.1837,  0.1355]],
       grad_fn=<SelectBackward0>)


In [44]:
print(bart_pad_cls_emb(abcde))

tensor([[ 0.9770,  0.5219, -0.5908,  ...,  0.9328,  0.1308, -0.5148],
        [ 0.9248,  0.5461, -0.4824,  ...,  0.7143, -0.0326, -0.3788],
        [ 0.8559,  0.5523, -0.5329,  ...,  0.8630,  0.0196, -0.4594],
        [ 1.1855,  0.7464, -0.8031,  ...,  0.0654,  0.3510, -0.7310],
        [ 0.6374,  1.1278, -0.2586,  ..., -0.3083,  0.1837,  0.1355]],
       grad_fn=<SelectBackward0>)


In [40]:
for i in abcde:
    print(bart_mean_emb(i))

tensor([[ 0.1276,  0.8422, -0.4601,  ...,  0.7251,  0.2987,  0.0513]],
       grad_fn=<DivBackward0>)
tensor([[-0.0174,  0.8903, -0.3392,  ...,  0.5345,  0.2470,  0.2135]],
       grad_fn=<DivBackward0>)
tensor([[-1.5043e-02,  7.7883e-01, -3.3464e-01,  ...,  6.7676e-01,
          1.2624e-01, -5.8051e-04]], grad_fn=<DivBackward0>)
tensor([[ 0.4228,  0.3790, -0.9571,  ..., -0.1875,  0.1752, -0.5779]],
       grad_fn=<DivBackward0>)
tensor([[ 0.7454,  1.2256, -0.4385,  ..., -0.1347, -0.0076,  0.1285]],
       grad_fn=<DivBackward0>)


In [43]:
for i in abcde:
    print(bart_pad_mean_emb(i))

tensor([[ 0.1276,  0.8422, -0.4601,  ...,  0.7251,  0.2987,  0.0513]],
       grad_fn=<DivBackward0>)
tensor([[-0.0174,  0.8903, -0.3392,  ...,  0.5345,  0.2470,  0.2135]],
       grad_fn=<DivBackward0>)
tensor([[-1.5043e-02,  7.7883e-01, -3.3464e-01,  ...,  6.7676e-01,
          1.2624e-01, -5.8051e-04]], grad_fn=<DivBackward0>)
tensor([[ 0.4228,  0.3790, -0.9571,  ..., -0.1875,  0.1752, -0.5779]],
       grad_fn=<DivBackward0>)
tensor([[ 0.7454,  1.2256, -0.4385,  ..., -0.1347, -0.0076,  0.1285]],
       grad_fn=<DivBackward0>)


In [45]:
print(bart_pad_mean_emb(abcde))

tensor([[ 1.2758e-01,  8.4224e-01, -4.6007e-01,  ...,  7.2510e-01,
          2.9873e-01,  5.1315e-02],
        [-1.7448e-02,  8.9026e-01, -3.3915e-01,  ...,  5.3452e-01,
          2.4703e-01,  2.1352e-01],
        [-1.5043e-02,  7.7883e-01, -3.3464e-01,  ...,  6.7676e-01,
          1.2624e-01, -5.8100e-04],
        [ 4.2275e-01,  3.7902e-01, -9.5708e-01,  ..., -1.8747e-01,
          1.7521e-01, -5.7789e-01],
        [ 7.4544e-01,  1.2256e+00, -4.3848e-01,  ..., -1.3469e-01,
         -7.5986e-03,  1.2847e-01]], grad_fn=<DivBackward0>)


In [16]:
torch.nn.CosineSimilarity(dim=0, eps=1e-8)(bart_cls_emb(a)[0], bart_cls_emb(b)[0])

tensor(0.9938, grad_fn=<SumBackward1>)

In [17]:
torch.nn.CosineSimilarity(dim=1, eps=1e-8)(bart_cls_emb(a), bart_cls_emb(b))

tensor([0.9938], grad_fn=<SumBackward1>)

In [19]:
torch.nn.CosineSimilarity(dim=0, eps=1e-8)(bart_cls_emb(a), bart_cls_emb(b))

tensor([ 1.,  1.,  1.,  ...,  1., -1.,  1.], grad_fn=<SumBackward1>)

In [20]:
cos_sim = torch.nn.CosineSimilarity(dim=1, eps=1e-8)

In [18]:
bart_cls_emb(a).shape

torch.Size([1, 1024])

In [49]:
z = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
z5 = [z, z, z, z, z]
z7 = [z,z,z,z,z,z,z]
z2 = [z7, z7]

zz = torch.tensor([z2, z2, z2])
zz.shape, zz

(torch.Size([3, 2, 7, 9]),
 tensor([[[[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000]],
 
          [[0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],
           [0.1000, 0.2000, 0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000,
            0.9000],


In [59]:
torch.nn.CosineSimilarity(dim=0, eps=1e-8)(zz, torch.tensor(z)).shape

torch.Size([2, 7, 9])

In [70]:
zz.shape, torch.tensor(z2).shape, torch.nn.CosineSimilarity(dim=1, eps=1e-8)(zz, torch.tensor(z2)).shape

(torch.Size([3, 2, 7, 9]), torch.Size([2, 7, 9]), torch.Size([3, 7, 9]))

In [79]:
zz.shape, torch.tensor([z2, z2]).shape, torch.nn.CosineSimilarity(dim=2, eps=1e-8)(zz, torch.tensor([z2, z2])).shape

RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 0

In [53]:
torch.nn.CosineSimilarity(dim=3, eps=1e-8)(zz, zz)

tensor([[[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]]])

In [28]:
bart_cls_emb(d)

tensor([[ 1.1855,  0.7464, -0.8031,  ...,  0.0654,  0.3510, -0.7310]],
       grad_fn=<SelectBackward0>)

In [81]:
def get_simi_level(emb_func, a, b):
    cos_sim = torch.nn.CosineSimilarity(dim=1, eps=1e-8)
    return cos_sim(emb_func(a), emb_func(b))

In [83]:
for i in abcde:
    for j in abcde:
        print(f"{i} == {j} : {get_simi_level(bart_cls_emb, i, j)}")
    print()

i love dogs == i love dogs : tensor([1.0000], grad_fn=<SumBackward1>)
i love dogs == i like dogs : tensor([0.9938], grad_fn=<SumBackward1>)
i love dogs == i hate cats : tensor([0.9957], grad_fn=<SumBackward1>)
i love dogs == earth is yellow and flat : tensor([0.6725], grad_fn=<SumBackward1>)
i love dogs == tonight is the dire of your fight : tensor([0.7309], grad_fn=<SumBackward1>)

i like dogs == i love dogs : tensor([0.9938], grad_fn=<SumBackward1>)
i like dogs == i like dogs : tensor([1.0000], grad_fn=<SumBackward1>)
i like dogs == i hate cats : tensor([0.9930], grad_fn=<SumBackward1>)
i like dogs == earth is yellow and flat : tensor([0.6705], grad_fn=<SumBackward1>)
i like dogs == tonight is the dire of your fight : tensor([0.7324], grad_fn=<SumBackward1>)

i hate cats == i love dogs : tensor([0.9957], grad_fn=<SumBackward1>)
i hate cats == i like dogs : tensor([0.9930], grad_fn=<SumBackward1>)
i hate cats == i hate cats : tensor([1.0000], grad_fn=<SumBackward1>)
i hate cats == ear

In [84]:
for i in abcde:
    for j in abcde:
        print(f"{i} == {j} : {get_simi_level(bart_mean_emb, i, j)}")
    print()

i love dogs == i love dogs : tensor([1.0000], grad_fn=<SumBackward1>)
i love dogs == i like dogs : tensor([0.9606], grad_fn=<SumBackward1>)
i love dogs == i hate cats : tensor([0.9533], grad_fn=<SumBackward1>)
i love dogs == earth is yellow and flat : tensor([0.5585], grad_fn=<SumBackward1>)
i love dogs == tonight is the dire of your fight : tensor([0.4902], grad_fn=<SumBackward1>)

i like dogs == i love dogs : tensor([0.9606], grad_fn=<SumBackward1>)
i like dogs == i like dogs : tensor([1.], grad_fn=<SumBackward1>)
i like dogs == i hate cats : tensor([0.9584], grad_fn=<SumBackward1>)
i like dogs == earth is yellow and flat : tensor([0.5708], grad_fn=<SumBackward1>)
i like dogs == tonight is the dire of your fight : tensor([0.5138], grad_fn=<SumBackward1>)

i hate cats == i love dogs : tensor([0.9533], grad_fn=<SumBackward1>)
i hate cats == i like dogs : tensor([0.9584], grad_fn=<SumBackward1>)
i hate cats == i hate cats : tensor([1.], grad_fn=<SumBackward1>)
i hate cats == earth is ye