In [1]:
from google import genai
from google.cloud import aiplatform_v1
from google.genai.types import EmbedContentConfig

### 1. Define a query:

In [2]:
query_text = "pobreza"

### 2. Generate embedding for the query:

In [4]:
vertex_client = genai.Client(
    vertexai=True, 
    project="cosmic-abbey-470621-f6", 
    location="us-east1"
)
query_response = vertex_client.models.embed_content(
    model="gemini-embedding-001",
    contents=query_text,
    config=EmbedContentConfig(
        task_type="RETRIEVAL_DOCUMENT",
        output_dimensionality=3072
    )
)
ebs = query_response.embeddings[0].values

### 3. Perform similarity search with the query embedding:

In [None]:
API_ENDPOINT="1794278526.us-east1-514700908055.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/514700908055/locations/us-east1/indexEndpoints/6544755003406417920"
DEPLOYED_INDEX_ID="esma_vector_search_index_columns"

client_options = {
  "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
  client_options=client_options,
)

# Restricts works as a filter on the indexed datapoints
restricts = [
    aiplatform_v1.IndexDatapoint.Restriction(
        namespace="database",
        allow_list=["enemdu-2024"]
    ),
    aiplatform_v1.IndexDatapoint.Restriction(
        namespace="table_id",
        allow_list=["INGREFAM_EPHC_ANUAL_2024"]
    )
]

datapoint = aiplatform_v1.IndexDatapoint(
  feature_vector=ebs,
  restricts=restricts,
)
query = aiplatform_v1.FindNeighborsRequest.Query(
  datapoint=datapoint,
  neighbor_count=15
)
request = aiplatform_v1.FindNeighborsRequest(
  index_endpoint=INDEX_ENDPOINT,
  deployed_index_id=DEPLOYED_INDEX_ID,
  queries=[query],
  return_full_datapoint=True,
)

response = vector_search_client.find_neighbors(request)

In [6]:
results = []
for x in response.nearest_neighbors[0].neighbors:
    column_name = x.datapoint.embedding_metadata.get("column_name")
    table_id = x.datapoint.embedding_metadata.get("table_id")
    text = x.datapoint.embedding_metadata.get("text")
    data_type = x.datapoint.embedding_metadata.get("data_type")
    record = f"Column Name: {column_name} | Table ID: {table_id} | Data Type: {data_type} | Information: {text}"
    results.append(record)
print("\n".join(results))

Column Name: POBREZA | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: Pobreza | Poverty status | Values: 0=NO POBRE, 1=POBRE
Column Name: EPOBREZA | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: Pobreza extrema | Extreme poverty status | Values: 0=NO INDIGENTE, 1=INDIGENTE
Column Name: INGPC | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: Ingreso por cápita | Per capita income
Column Name: DESEMPLEO | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: Población con desempleo | Unemployed population indicator | Values: 1=Población con desempleo
Column Name: SD028 | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: La semana pasada que no tenía trabajo - Ayudó en algún | Last week when had no job - Helped in some | Values: 1=Si, 2=No
Column Name: P44A | Table ID: BDDENEMDU_PERSONAS_2024_ANUAL | Data Type: NUMERIC | Information: Recibe alimentación | 