In [1]:
from text2sql import hello
print(hello.message)

hello, world!


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

from text2sql.engine.embeddings import (
    AzureEmbedder, 
    BedrockCohereEmbedder, 
    BedrockTitanv2Embedder, 
    SentenceTransformerEmbedder,
)

  from tqdm.autonotebook import tqdm, trange


In [15]:
api_key=os.environ.get("AZURE_OPENAI_API_KEY")
azure_endpoint=os.environ.get("AZURE_OPENAI_API_ENDPOINT")
api_version=os.environ.get("AZURE_OPENAI_API_VERSION")
model=os.environ.get("AZURE_OPENAI_MODEL")
print(f"api_key: {api_key[:3]}")
print(f"azure_endpoint: {azure_endpoint}")
print(f"api_version: {api_version}")
print(f"model: {model}")

api_key: 3dc
azure_endpoint: https://gena-gpt-2.openai.azure.com/
api_version: 2024-06-01
model: gena-text-embedding-3-small


In [5]:
azure_embedder = AzureEmbedder(
    api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.environ.get("AZURE_OPENAI_API_ENDPOINT"),
    api_version=os.environ.get("AZURE_OPENAI_API_VERSION"),
    model=os.environ.get("AZURE_OPENAI_MODEL"),
    batch_size=1,
)

In [6]:
bedrock_cohere_embedder = BedrockCohereEmbedder(
    region_name="us-east-1",
    model="cohere.embed-multilingual-v3",
    input_type="clustering",
    batch_size=1,
)

In [7]:
bedrock_titan_embedder = BedrockTitanv2Embedder(
    region_name="us-east-1",
    model="amazon.titan-embed-text-v2:0",
    dimensions=1024,
    normalize=True,
    batch_size=1,
)

In [8]:
sentence_transformer_embedder = SentenceTransformerEmbedder(
    model_path="sentence-transformers/LaBSE"
)



In [9]:
texts = [
    "The Trojans, after a seven years’ voyage, set sail for Italy, but are overtaken by a dreadful storm, which Aeolus raises at the request of Juno.",
    "트로이아군은 7년간의 항해 끝에 이탈리아로 항해를 떠났지만 주노의 요청으로 무서운 폭풍에 추월당합니다.",
    "enus complains to Jupiter of her son’s misfortunes. Jupiter comforts her, and sends Mercury to procure him a kind reception among the Carthaginians.",
    "에누스는 주피터에게 아들의 불행에 대해 불평합니다. 주피터는 그녀를 위로하고 머큐리를 보내 카르타고 인들 사이에서 아들을 친절하게 맞이합니다.",
]

In [10]:
v = bedrock_cohere_embedder.embed("hello, world!")
print(v[:16])

  datetime_now = datetime.datetime.utcnow()


[8.8870525e-05, 0.052093506, 0.020202637, 0.044403076, -0.021209717, -0.023620605, -0.022064209, -0.027786255, -0.058898926, 0.028869629, 0.026016235, -0.04647827, 0.03491211, -0.014183044, 0.032592773, -0.03225708]


In [11]:
azure_embeddings = azure_embedder.embed(texts, verbose=True)
print(cosine_similarity(azure_embeddings, azure_embeddings))

100%|██████████| 4/4 [00:02<00:00,  1.75it/s]

[[1.         0.31451158 0.46395302 0.07135151]
 [0.31451158 1.         0.1697474  0.22863711]
 [0.46395302 0.1697474  1.         0.31457679]
 [0.07135151 0.22863711 0.31457679 1.        ]]





In [12]:
bedrock_cohere_embeddings = bedrock_cohere_embedder.embed(texts, verbose=True)
print(cosine_similarity(bedrock_cohere_embeddings, bedrock_cohere_embeddings))

  datetime_now = datetime.datetime.utcnow()
100%|██████████| 4/4 [00:01<00:00,  3.51it/s]

[[1.         0.74092123 0.61986535 0.52285805]
 [0.74092123 1.         0.50399098 0.59554468]
 [0.61986535 0.50399098 1.         0.73378015]
 [0.52285805 0.59554468 0.73378015 1.        ]]





In [13]:
bedrock_titan_embeddings = bedrock_titan_embedder.embed(texts, verbose=True)
print(cosine_similarity(bedrock_titan_embeddings, bedrock_titan_embeddings))

  datetime_now = datetime.datetime.utcnow()
100%|██████████| 4/4 [00:01<00:00,  2.25it/s]

[[1.         0.55039733 0.14172415 0.09633639]
 [0.55039733 1.         0.02770804 0.06834226]
 [0.14172415 0.02770804 1.         0.46775086]
 [0.09633639 0.06834226 0.46775086 1.        ]]





In [14]:
sentence_transformer_embeddings = sentence_transformer_embedder.embed(texts, verbose=True)
print(cosine_similarity(sentence_transformer_embeddings, sentence_transformer_embeddings))

100%|██████████| 4/4 [00:00<00:00,  7.37it/s]

[[1.0000001  0.85917854 0.30306768 0.35783553]
 [0.85917854 1.         0.2685324  0.36520082]
 [0.30306768 0.2685324  1.0000002  0.8163415 ]
 [0.35783553 0.36520082 0.8163415  1.        ]]



