In [4]:
import torch
from scipy import spatial
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')

sentences = ['I love apples', 'Apples are my favorite fruit', 'Ive got the newest newspaper', 'The radio seems broken', 'Anybody seen my friend Josh?', 'There is an orchard nearby', 'Wishing you great harvest', 'We found some fruit in the fridge']

inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

embeddings = outputs.last_hidden_state

emb1 = torch.mean(embeddings[0], dim=0)

similarities = []
for i, emb in enumerate(embeddings[1:]):
    emb_i = torch.mean(emb, dim=0) 
    cos_sim = 1 - spatial.distance.cosine(emb1, emb_i)
    similarities.append((cos_sim.item(), sentences[i+1]))

similarities.sort(reverse=True)
print(similarities)

[(0.7690949440002441, 'Apples are my favorite fruit'), (0.7427406311035156, 'Wishing you great harvest'), (0.6510136127471924, 'The radio seems broken'), (0.5235278606414795, 'Ive got the newest newspaper'), (0.5063028335571289, 'There is an orchard nearby'), (0.4969409108161926, 'We found some fruit in the fridge'), (0.46399056911468506, 'Anybody seen my friend Josh?')]
