In [49]:
import numpy as np
from sentence_transformers import SentenceTransformer

In [50]:
model = SentenceTransformer("all-MiniLM-L6-v2")

In [51]:
docs = [
    "The dog jumped over the cat",
    "The cat jumped over the dog",
    "It is very warm today",
    "The cat is yellow and the dog is red",
    "The dog jumped over the purple cow",
]

docs = [{"id": i + 1, "text": x} for i, x in enumerate(docs)]
docs

[{'id': 1, 'text': 'The dog jumped over the cat'},
 {'id': 2, 'text': 'The cat jumped over the dog'},
 {'id': 3, 'text': 'It is very warm today'},
 {'id': 4, 'text': 'The cat is yellow and the dog is red'},
 {'id': 5, 'text': 'The dog jumped over the purple cow'}]

In [52]:

embeddings = [(doc.get('id'), model.encode(doc.get('text'))) for doc in docs]
embeddings

[(1,
  array([ 5.44742011e-02,  3.71413827e-02,  7.23646060e-02,  7.01895282e-02,
         -5.60800266e-03, -2.30734120e-03, -3.77298743e-02,  1.51365250e-02,
          9.50726215e-03, -3.03883397e-04,  5.88888228e-02,  2.10996363e-02,
          6.14678860e-03, -6.37331083e-02,  1.38773825e-02, -9.31995176e-03,
         -1.19946666e-01, -2.11698958e-03,  6.35645613e-02, -2.08781231e-02,
         -2.80395746e-02, -3.85088958e-02,  2.66364077e-03, -5.36900461e-02,
         -2.24582348e-02,  4.27321270e-02, -6.99243918e-02, -6.33601770e-02,
          1.81462448e-02, -1.30237909e-02, -1.66833140e-02, -1.05505595e-02,
         -3.54123339e-02,  5.32195903e-02, -6.18179552e-02, -6.84991479e-02,
          6.55241981e-02, -1.77701499e-04,  4.45996150e-02,  1.20984346e-01,
         -4.50547412e-02,  1.97268799e-02, -1.27502736e-02,  1.16577803e-03,
         -3.26759741e-02,  6.10489547e-02, -1.04520917e-02, -6.91033229e-02,
          3.60625535e-02,  4.39832322e-02, -1.22472104e-02,  9.17075798

In [56]:
query = "the purple cow is a great book"
query_embedding = model.encode([query])

results = [(id, model.similarity(embedding, query_embedding)) for id, embedding in embeddings]
results

[(1, tensor([[0.1384]])),
 (2, tensor([[0.1405]])),
 (3, tensor([[0.0386]])),
 (4, tensor([[0.1982]])),
 (5, tensor([[0.4953]]))]

In [57]:
results.sort(key=lambda x: x[1], reverse=True)
results

[(5, tensor([[0.4953]])),
 (4, tensor([[0.1982]])),
 (2, tensor([[0.1405]])),
 (1, tensor([[0.1384]])),
 (3, tensor([[0.0386]]))]

In [58]:
for id, rank in results:
    doc = next(doc for doc in docs if doc['id'] == id)
    print(id, rank, doc)

5 tensor([[0.4953]]) {'id': 5, 'text': 'The dog jumped over the purple cow'}
4 tensor([[0.1982]]) {'id': 4, 'text': 'The cat is yellow and the dog is red'}
2 tensor([[0.1405]]) {'id': 2, 'text': 'The cat jumped over the dog'}
1 tensor([[0.1384]]) {'id': 1, 'text': 'The dog jumped over the cat'}
3 tensor([[0.0386]]) {'id': 3, 'text': 'It is very warm today'}
