In [None]:
!pip install --upgrade --user google-cloud-aiplatform>=1.29.0 google-cloud-storage

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [None]:
# get project ID
PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]
LOCATION = "us-central1"
if PROJECT_ID == "(unset)":
    print(f"Please set the project ID manually below")

In [None]:
# define project information
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]"

# generate a unique id for this session
from datetime import datetime
UID = datetime.now().strftime("%m%d%H%M")
print(PROJECT_ID)

In [None]:
! gcloud services enable compute.googleapis.com aiplatform.googleapis.com storage.googleapis.com --project {PROJECT_ID}

In [None]:
BUCKET_URI = f"gs://{PROJECT_ID}-vs-quickstart-{UID}"

In [None]:
! gsutil mb -l $LOCATION -p $PROJECT_ID $BUCKET_URI
! gsutil cp "gs://github-repo/data/vs-quickstart/product-embs.json" $BUCKET_URI

In [None]:
! gsutil cp "gs://github-repo/data/vs-quickstart/product-embs.json" . # for query tests

In [None]:
! head product-embs.json


In [None]:
# init the aiplatform package
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# create Index
my_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name = f"vs-quickstart-index-{UID}",
    contents_delta_uri = BUCKET_URI,
    dimensions = 768,
    approximate_neighbors_count = 10,
)

In [None]:
## create `IndexEndpoint`
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name = f"vs-quickstart-index-endpoint-{UID}",
    public_endpoint_enabled = True
)

In [None]:
DEPLOYED_INDEX_ID = f"vs_quickstart_deployed_{UID}"
print(DEPLOYED_INDEX_ID)

In [None]:
# deploy the Index to the Index Endpoint
my_index_endpoint.deploy_index(
    index = my_index, deployed_index_id = DEPLOYED_INDEX_ID
)

In [None]:
import json

# build dicts for product names and embs
product_names = {}
product_embs = {}
with open('product-embs.json') as f:
    for l in f.readlines():
        p = json.loads(l)
        id = p['id']
        product_names[id] = p['name']
        product_embs[id] = p['embedding']

In [None]:
 # Get the embedding for ID 6523 "cloudveil women's excursion short" you can also try with other IDs such as 12711, 18090, 19536 and 11863
query_emb = product_embs['6523']

print(query_emb)

In [None]:
# run query
response = my_index_endpoint.find_neighbors(
    deployed_index_id = DEPLOYED_INDEX_ID,
    queries = [query_emb],
    num_neighbors = 10
)

# show the results
for idx, neighbor in enumerate(response[0]):
    print(f"{neighbor.distance:.2f} {neighbor.id} {product_names[neighbor.id]}")

# SEARCH the Index

In [None]:
from vertexai.language_models import TextEmbeddingModel


def text_embedding(text_to_embed) -> list:
    """Text embedding with a Large Language Model."""
    model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")
    embeddings = model.get_embeddings([text_to_embed])
    for embedding in embeddings:
        vector = embedding.values
        print(f"Length of Embedding Vector: {len(vector)}")
    return vector


## Specify a search

In [None]:
SEARCH_TERM = "Winter Jacket with Hood for men"

In [None]:
SEARCH_TERM_EMBEDDING = text_embedding(SEARCH_TERM)

print(SEARCH_TERM_EMBEDDING)

## Query the index using the embedding created from the search term

In [None]:
response = my_index_endpoint.find_neighbors(
    deployed_index_id = DEPLOYED_INDEX_ID,
    queries = [SEARCH_TERM_EMBEDDING],
    num_neighbors = 10
)

# show the results
for idx, neighbor in enumerate(response[0]):
    print(f"{neighbor.distance:.2f} {neighbor.id} {product_names[neighbor.id]}")