In [5]:
import numpy
import tenseal

from datasets import load_dataset

from search.embedding import Model
from search.index import Index
from search.client import Client

In [6]:
dataset = load_dataset("imdb")["train"][:100]["text"]
context = tenseal.context(
    tenseal.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.generate_galois_keys()
context.global_scale = 2**40

model = Model(id="paraphrase-MiniLM-L6-v2")
index = Index(model=model, corpus=dataset)
client = Client(model=model, centroids=index.centroids, context=context)

In [7]:
text = dataset[0]

query = client.query(text)
result = client.decrypt(index.search(query))

In [8]:
client.rank(result=result, text=text)

[17,
 0,
 2,
 4,
 1,
 3,
 10,
 15,
 11,
 12,
 26,
 14,
 27,
 16,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 13,
 7,
 8,
 5,
 6,
 9,
 18]

In [None]:
ranking = result @ model.encode(text).T

ranking

In [None]:
numpy.flip(numpy.argsort(ranking)).tolist()