In [1]:
from google import genai
from google.cloud import aiplatform_v1
from google.genai.types import EmbedContentConfig

### 1. Define a query:

In [None]:
query_text = "pobreza extrema"

### 2. Generate embedding for the query:

In [None]:
vertex_client = genai.Client(
    vertexai=True, 
    project="cosmic-abbey-470621-f6", 
    location="us-east1"
)
query_response = vertex_client.models.embed_content(
    model="gemini-embedding-001",
    contents=query_text,
    config=EmbedContentConfig(
        task_type="RETRIEVAL_DOCUMENT",
        output_dimensionality=3072
    )
)
ebs = query_response.embeddings[0].values

### 3. Perform similarity search with the query embedding:

In [None]:
API_ENDPOINT="2141955479.us-east1-514700908055.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/514700908055/locations/us-east1/indexEndpoints/5723692496341434368"
DEPLOYED_INDEX_ID="geih_2024_columns_dp"

client_options = {
  "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
  client_options=client_options,
)

# Restricts works as a filter on the indexed datapoints
restricts = [
    aiplatform_v1.IndexDatapoint.Restriction(
        namespace="table_id",
        allow_list=["DBF_GECH_HOGARES"]
    )
]

datapoint = aiplatform_v1.IndexDatapoint(
  feature_vector=ebs,
  restricts=restricts,
)
query = aiplatform_v1.FindNeighborsRequest.Query(
  datapoint=datapoint,
  neighbor_count=15
)
request = aiplatform_v1.FindNeighborsRequest(
  index_endpoint=INDEX_ENDPOINT,
  deployed_index_id=DEPLOYED_INDEX_ID,
  queries=[query],
  return_full_datapoint=True,
)

response = vector_search_client.find_neighbors(request)

In [None]:
results = []
for x in response.nearest_neighbors[0].neighbors:
    column_name = x.datapoint.embedding_metadata.get("column_name")
    table_id = x.datapoint.embedding_metadata.get("table_id")
    text = x.datapoint.embedding_metadata.get("text")
    data_type = x.datapoint.embedding_metadata.get("data_type")
    record = f"Column Name: {column_name} | Table ID: {table_id} | Data Type: {data_type} | Information: {text}"
    results.append(record)
print("\n".join(results))