# test retrieval functions

In [1]:
import sys
sys.path.append("../src")
from text2sql import hello
print(hello.message)

hello, world!


In [2]:
import json

import numpy as np

from text2sql.engine.embeddings import SentenceTransformerEmbedder
from text2sql.engine.retrieval import LocalRetriever, WeaviateRetriever

  from tqdm.autonotebook import tqdm, trange


In [3]:
sentence_transformer_embedder = SentenceTransformerEmbedder(
    model_path="sentence-transformers/LaBSE"
)



In [4]:
# test with text from aeneid (public domain) 
# https://classics.mit.edu/Virgil/aeneid.1.i.html
import os

with open("aeneid_sample.txt") as f:
    texts = f.read().split("\n")
texts = [t.strip().lstrip() for t in texts if t]

if not os.path.exists("aeneid_sample_embeddings.npy"):
    embeddings = sentence_transformer_embedder.embed(texts, verbose=True)
    np.save("aeneid_sample_embeddings.npy", embeddings)
else:
    embeddings = np.load("aeneid_sample_embeddings.npy")
assert len(embeddings) == len(texts)

In [5]:
data = [{"line": line + 1, "text": text} for line, text in enumerate(texts)]

In [6]:
query_text = "Before his eyes his goddess mother stood:"
query_vector = sentence_transformer_embedder.embed(query_text)

### test local retriever

In [7]:
local_retriever = LocalRetriever(embeddings=embeddings, data=data)
local_responses = local_retriever.query(query_vector, top_k=5)
for d in local_responses:
    print(json.dumps(d, indent=2))

{
  "id": 434,
  "distance": 0.0,
  "data": {
    "line": 435,
    "text": "Before his eyes his goddess mother stood:"
  }
}
{
  "id": 826,
  "distance": 0.38323378562927246,
  "data": {
    "line": 827,
    "text": "His mother goddess, with her hands divine,"
  }
}
{
  "id": 487,
  "distance": 0.47407424449920654,
  "data": {
    "line": 488,
    "text": "Of her unhappy lord: the spectre stares,"
  }
}
{
  "id": 919,
  "distance": 0.49701905250549316,
  "data": {
    "line": 920,
    "text": "Her mother Leda\u2019s present, when she came"
  }
}
{
  "id": 967,
  "distance": 0.5039515495300293,
  "data": {
    "line": 968,
    "text": "He walks Iulus in his mother\u2019s sight,"
  }
}


### test weaviate retriever

In [8]:
weaviate_host = "localhost"
weaviate_port = 8081
weaviate_gpu_port = 50051

weaviate_retriever = WeaviateRetriever(
    host=weaviate_host,
    port=weaviate_port,
    grpc_port=weaviate_gpu_port,
    collection_name="AeneidLabse"
)

In [9]:
# add data
info = weaviate_retriever.populate_collection(embeddings, data, delete_existing=True, verbose=True)
print(info.get("collection_name"), info.get("count"))

100%|██████████| 1066/1066 [00:00<00:00, 4682.41it/s]

AeneidLabse 1066





In [10]:
# query
weaviate_responses = weaviate_retriever.query(query_vector, top_k=5)
for d in weaviate_responses:
    print(json.dumps(d, indent=2))


{
  "id": "1ecd05b6-22f0-5a8a-bd06-d3906706d060",
  "distance": -2.384185791015625e-07,
  "data": {
    "text": "Before his eyes his goddess mother stood:",
    "line": 435
  }
}
{
  "id": "138b2937-9715-5573-beca-f2e6e6b36d34",
  "distance": 0.38323378562927246,
  "data": {
    "text": "His mother goddess, with her hands divine,",
    "line": 827
  }
}
{
  "id": "630e9061-18e4-5c36-a93b-5258f9541043",
  "distance": 0.47407418489456177,
  "data": {
    "text": "Of her unhappy lord: the spectre stares,",
    "line": 488
  }
}
{
  "id": "c68e098b-e587-52b7-8302-1dfcec7d2bc4",
  "distance": 0.4970189332962036,
  "data": {
    "text": "Her mother Leda\u2019s present, when she came",
    "line": 920
  }
}
{
  "id": "9639d4a1-576e-58b9-89dc-c30f7a94a4d1",
  "distance": 0.503951370716095,
  "data": {
    "text": "He walks Iulus in his mother\u2019s sight,",
    "line": 968
  }
}
