In [1]:

# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
     

<h3>Recommending News Articles based on semantic textual similarity using Vertex Matching Engine</h3>

<h4> Overview</h4>
<p>This is the implementation of a recommendation System discussed in this <a href="url">blog post</a></p>




<h3>Before you begin</h3>

<h4>Set your project ID</h4>
If you don't know your project ID, try the following:
<ul>
    <li>Run gcloud config list.</li>
    <li>Run gcloud projects list.</li>
</ul>

In [None]:
PROJECT_ID = "[YOUR-PROJECT-ID]"

# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
import sys
import os
import logging
import time
import random
import string
import json
from typing import Type
import apache_beam as beam
from google.cloud import aiplatform
from google.cloud import aiplatform_v1
from apache_beam.pipeline import PipelineOptions
from google.protobuf import struct_pb2

REGION = "us-west1"
# BUCKET_NAME="gs://dataflow_blog_example_bct/embeddings/prediction-universal_encoder_embedding_model_01-2023_03_09T10_56_00_808Z"
BUCKET_NAME="gs://me101chris"
DEPLOYED_MODEL_ID="7666164504761204736"

ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
PARENT = "projects/{}/locations/{}".format(PROJECT_ID, REGION)
DIMENSIONS = 512 # the embeddings dim from the model
DISPLAY_NAME = "similar_article_index"

<h3>Data Ingestion</h3>
Note: we have downloaded the public <a href='https://www.kaggle.com/datasets/rmisra/news-category-dataset'>news category dataset</a> from Kaggle datasets and stored the data file in the GCS bucket. 
<ul>
    <li>Read the txt files stored in Google Cloud Storage,</li>
    <li>Parse the files, remove special characters and concatenate the title and body.</li>
    <li>Write the transformed data into GCS in a JSONL format (i.e. prediction input instances) that Vertex AI consumes as input for batch prediction jobs.
</li>

</ul>



In [53]:
def process(data: str) -> Dict:
    data = json.loads(data)
    data["text"] =  data["headline"] + " " + data["short_description"]
    filtered_char = [ "\t", "\0", "\a", "\b", "\f", "\r", "\x0b", "\x0c", '"', "\xa0", "\n", "\xad", "\x99", "\x94", "\x93", "\x80", "\x7f" ]

    for char in filtered_char:
        data["text"] = data["text"].replace(char, "")

    data["text"] = data["text"].replace('"\"', "")
    data["text"] = data["text"].replace("'", "##")
  
    yield {
        "article_id": data["link"],
        "bytes_inputs": data["text"] 
    }

def build_pipeline(pipeline: Type[beam.Pipeline]):
    """Builds Apache Beam pipeline."""

    # Setting the data source and target
    articles_source_csv_file ="gs://dataflow_blog_example_bct/News_Category_Dataset_v3.json" 
    
    # Read article jsonl files from gcs
    steps = (pipeline
            | "Read article file" >> beam.io.ReadFromText(articles_source_csv_file, skip_header_lines=1)
            | "Parse article parse" >> beam.ParDo(process) 
            | "change char 1" >> beam.Map(lambda x: str(x).replace("'", '"'))
            | "change char 2" >> beam.Map(lambda x: str(x).replace("##", "'"))
            |"Write instances to jsonl" >> beam.io.WriteToText(
                file_path_prefix="gs://dataflow_blog_example_bct/instances/instances", file_name_suffix=".jsonl"
            )
             
            )
    return

In [54]:
options = PipelineOptions(
    runner = "DataflowRunner",
    project=PROJECT_ID,
    temp_location="gs://dataflow_blog_example_bct/temp",
    region=REGION
)

with beam.Pipeline(options=options) as pipeline:
    build_pipeline(pipeline)
print("done")




done


<h3>Change of Model Signature with customized  output format</h3>
Note: we have downloaded the <a href='https://tfhub.dev/google/universal-sentence-encoder/4'>sentence encoder model</a> files and stored in the GCS bucket.<br> 
This function takes the original model, changes the output format (i.e. outputs from TensorFlow saved model signature) by adding the article_id, and saves a new copy as a 'wrapped' version in GCS.<br>
Eg: {"article_id": article_id,"embedding": [1,1,1,1,1,...]}

In [68]:
def model_change_signature(model_path: str) -> None:

    model = tf.saved_model.load(model_path)
    def _get_serve_fn(model):
        @tf.function
        def serve_fn(bytes_inputs, article_id):
            
            vector = model(bytes_inputs)

            return {
                "article_id": article_id,
                "embedding_vector": vector
            }
        return serve_fn

    signatures = {
        "serving_default": _get_serve_fn(model).get_concrete_function(
            # input text
            tf.TensorSpec(shape=[None], dtype=tf.string),
            # input article ID
            tf.TensorSpec(shape=[None], dtype=tf.string)
        )
    }
    tf.saved_model.save(model, os.path.join(model_path , 'wrapped_model') , signatures=signatures)

print("starting...")
model_change_signature("gs://dataflow_blog_example_bct/universal-sentence-encoder_4/")
print("done")

starting...
INFO:tensorflow:Assets written to: gs://dataflow_blog_example_bct/universal-sentence-encoder_4/wrapped_model/assets
done


<h3>Upload the embedding model to Vertex AI</h3>
After upload, you can perform batch prediction. 
However, for stream prediction, you need to deploy the model to an endpoint.


In [69]:
#Upload the embedding model to GCP
aiplatform.init(project=PROJECT_ID, location=REGION)

model = aiplatform.Model.upload(
    display_name="universal_encoder_embedding_model_01",
    artifact_uri=BUCKET_NAME+"/universal-sentence-encoder_4/wrapped_model",
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-11:latest",
    sync=False,
)

model.wait()
print("done")

Creating Model
Create Model backing LRO: projects/543051426883/locations/us-west1/models/7666164504761204736/operations/671148494664237056
Model created. Resource name: projects/543051426883/locations/us-west1/models/7666164504761204736@1
To use this Model in another session:
model = aiplatform.Model('projects/543051426883/locations/us-west1/models/7666164504761204736@1')
done


<h3> Running Embedding model batch prediction</h3>

This batch prediction job will help us to transform all the articles into embeddings and save them in GCS in JSON format.

In [None]:
api_endpoint = REGION+"-aiplatform.googleapis.com"
client_options = {"api_endpoint": api_endpoint}
aiplatform.init(project=PROJECT_ID, location=REGION)

client = aiplatform.gapic.JobServiceClient(
    client_options=client_options
)
model = aiplatform.Model(DEPLOYED_MODEL_ID)
batch_prediction_job = {
    "display_name": "prediting embeddings",
    "model": model.resource_name,
    "input_config": {
        "instances_format": "jsonl",
        "gcs_source": {"uris": [BUCKET_NAME+"/instances/instances-00000-of-00001.jsonl"]},
    },
    "output_config": {
        "predictions_format": "jsonl",
        "gcs_destination": {"output_uri_prefix": BUCKET_NAME+"/embeddings/"},
    },
    "dedicated_resources": {
        "machine_spec": {
             "machine_type": "n1-standard-32",
             # "accelerator_type": "NVIDIA_TESLA_T4",
             # "accelerator_count": 2,
        },
        "starting_replica_count": 2,
        "max_replica_count":2,
    },
    "manual_batch_tuning_parameters": {
        # The default batch size is 4.   
        "batch_size": 5
    },
}
parent = f"projects/{PROJECT_ID}/locations/{REGION}"

job = client.create_batch_prediction_job(
    parent=parent, batch_prediction_job=batch_prediction_job
)


<h3>Serve the embedding model to an online prediction endpoint </h3>

<p>you cannot perform real time prediction without deploying your model to an endpoint on Vertex AI.</p>

<p>This step come in handy in production when we expect to receive one article at a time, map it to an embedding and query similar ones.</p>

In [82]:
project = PROJECT_ID
location = REGION
display_name="Universal_encoder_endpoint_01"

machine_type= "n1-standard"
vcpu= "16"


endpoint = aiplatform.Endpoint.create(
        display_name=display_name,
        project=PROJECT_ID,
        location=REGION,
        )
_ = endpoint.deploy(model=model,deployed_model_display_name=display_name, machine_type= machine_type + "-" + vcpu)

Creating Endpoint
Create Endpoint backing LRO: projects/543051426883/locations/us-west1/endpoints/2293370149308203008/operations/4677100363210293248
Endpoint created. Resource name: projects/543051426883/locations/us-west1/endpoints/2293370149308203008
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/543051426883/locations/us-west1/endpoints/2293370149308203008')
Deploying Model projects/543051426883/locations/us-west1/models/7666164504761204736 to Endpoint : projects/543051426883/locations/us-west1/endpoints/2293370149308203008
Deploy Endpoint model backing LRO: projects/543051426883/locations/us-west1/endpoints/2293370149308203008/operations/6942410975777652736
Endpoint model deployed. Resource name: projects/543051426883/locations/us-west1/endpoints/2293370149308203008


<h3>Creating A Matching Engine Index</h3>

<p>Before creating an index, we need to address one more thing: Matching Engine expect an input of the format { "id":"", "embedding":[1,1,...]}.</p> <p>However, the batch prediction result from the previous step is stored in a different format i.e {"instance": {"article_id": "", "bytes_inputs": ""}, "prediction": {"article_id": "", "embedding_vector": [-0.0030, -0.06,..]}},</p>

<p>The following pipepiline put the data in the right format for index creation </p>

In [None]:
#Prepare embeddings for index creation
def process(data):
    
    data = json.loads(data)
    data = data["prediction"]
    # print(data["prediction"])
    # print(data)
    yield {
        "id": data["article_id"],
        "embedding": data["embedding_vector"] 
    }

def build_pipeline(pipeline: Type[beam.Pipeline]):
    """Builds Apache Beam pipeline."""

    # Setting the data source and target
    # articles_source_json_file = config.get("articles_source_dir")
    embedding_source = BUCKET_NAME+"/embeddings/prediction-universal_encoder_embedding_model_01-2023_03_10T10_24_23_312Z"
    embedding_source_json_file = os.path.join(
        embedding_source, 'prediction.results-*')

    # Read article jsonl files from gcs
    steps = (pipeline
             |beam.io.ReadFromText(embedding_source_json_file)
             | "Parse article parse" >> beam.ParDo(process) 
             # |'format json' >> beam.Map(json.dumps)
             #|beam.Map(lambda item: {"id": item["id"], "embedding": item["embedding"]})
             | "Write instances to jsonl" >> beam.io.WriteToText(
                file_path_prefix=BUCKET_NAME+"/vector_embeddings/", file_name_suffix=".json", num_shards=20,shard_name_template="-SSSSS-of-NNNNN"
            )
        )
    return


In [None]:

options = PipelineOptions(
    # runner = "DataflowRunner",
    project=PROJECT_ID,
    temp_location=BUCKET_NAME+"/temp",
    region=REGION
)

with beam.Pipeline(options=options) as pipeline:
    build_pipeline(pipeline)
print("done")


<h4>Create The Matching Engine Index</h4>

In [None]:
#Create Index

def create_index(brute_force=False, stream_update=True):
    #instantiate client handler
    index_client = aiplatform_v1.IndexServiceClient(
            client_options=dict(api_endpoint=ENDPOINT)
        )


    # set the algorithm to brute force or ANN
    if brute_force:
        algorithmConfig = struct_pb2.Struct(fields={ 
        "bruteForceConfig": struct_pb2.Value(struct_value=struct_pb2.Struct())})

    else: # ANN algorithm
        treeAhConfig = struct_pb2.Struct(fields={
        "leafNodeEmbeddingCount": struct_pb2.Value(number_value=500),
        "leafNodesToSearchPercent": struct_pb2.Value(number_value=10) 
        })
        algorithmConfig = struct_pb2.Struct(fields={ 
        "treeAhConfig": struct_pb2.Value(struct_value=treeAhConfig)})

    # create the index config       
    index_config = struct_pb2.Struct(fields={
        "dimensions": struct_pb2.Value(number_value=DIMENSIONS),
        "approximateNeighborsCount": struct_pb2.Value(number_value=150),
        "distanceMeasureType": struct_pb2.Value(string_value="COSINE_DISTANCE"),
        "algorithmConfig": struct_pb2.Value(struct_value=algorithmConfig)
    })

    # create the index metadata
    metadata = struct_pb2.Struct(fields={
        "config": struct_pb2.Value(struct_value=index_config),
        "contentsDeltaUri": struct_pb2.Value(string_value=BUCKET_NAME+"/vector_embeddings/"),
            })

    # enable stream update or batch update
    if stream_update:
        index = {
            "display_name": DISPLAY_NAME,
            "description": "stream update",
            "metadata": struct_pb2.Value(struct_value=metadata),
            "index_update_method": aiplatform_v1.Index.IndexUpdateMethod.STREAM_UPDATE,
            }
    else:#batch update
        index = {
            "display_name": DISPLAY_NAME,
            "description": "batch update",
            "metadata": struct_pb2.Value(struct_value=metadata),
            "index_update_method": aiplatform_v1.Index.IndexUpdateMethod.BATCH_UPDATE,
                }

    # submit create index request
    created_index = index_client.create_index(parent=PARENT, index=index)
    # poll the job update logs
    while True:
        if created_index.done():
            break
        logging.info("Poll the operation to create index...")
        time.sleep(60)

if __name__ == "__main__":
    create_index()


<h3>Create an endpoint for deployment</h3>

In [None]:
vpc_network = "projects/543051426883/global/networks/default" # format is like 'projects/{project_number}/global/networks/{network_name}'

# create an endpoint 
index_endpoint_client = aiplatform_v1.IndexEndpointServiceClient(
        client_options=dict(api_endpoint=ENDPOINT)
    )


index_endpoint = {
        "display_name": DISPLAY_NAME + "_endpoint",
        "network": vpc_network,
    }

r = index_endpoint_client.create_index_endpoint(
        parent=PARENT, index_endpoint=index_endpoint
    )



<h3>Deploy the index to the endpoint</h3>

In [None]:
# deploy the index to the endpoint for querying
index ="3920964017765482496"

INDEX_ENDPOINT_NAME = r.result().name
DEPLOYED_INDEX_ID = DISPLAY_NAME + "_deployed_index"
INDEX_RESOURCE_NAME = "projects/543051426883/locations/us-west1/indexes/"+index #created_index.result().name # format is like 'projects/{project_number}/locations/{location}/indexes/{index_id}'

deploy_index = {
        "id": DEPLOYED_INDEX_ID,
        "display_name": DEPLOYED_INDEX_ID,
        "index": INDEX_RESOURCE_NAME,
    }

# submit the deploy index request
my_index_endpoint = index_endpoint_client.deploy_index(
        index_endpoint=INDEX_ENDPOINT_NAME, deployed_index=deploy_index
    )
# Poll the operation until it's done successfullly.

while True:
    if my_index_endpoint.done():
        break
    print("Poll the operation to deploy index...")
    time.sleep(60)
print("done")

<h3>Query the index</h3>

In [1]:
#Query the index
from google.cloud import aiplatform
# DEPLOYED_INDEX_ID = "similar_article_index_deployed_index"
idx_enpoint = "projects/penguins-mbagaya/locations/us-west1/indexEndpoints/6777160975192686592" #r.result().name


candidates_embedding = [[0.0502016456, -0.099912588, -0.8578929521, -0.035492491, -0.0298160315, 0.0578172, -0.0591950715, 0.00769451866, 0.0521428809, -0.0768064186, -0.0360548832, 0.0511014275, 0.0607668199, -0.0167274252, 0.0278223436, -0.0733504072, -0.0553152338, 0.011630984, -0.0372170731, -0.03048319, -0.0561569594, 0.0393138938, 0.0769464225, 0.0459631607, 0.0162670016, 0.0580527596, -0.0380507633, 0.0437482595, -0.0216746423, -0.0175770447, -0.0174269807, 0.00592250936, 0.0195918, -0.0434573852, 0.0215063971, 0.0380142666, 0.0627181679, -0.00142126402, -0.0280464888, 0.081744507, 0.0385259166, -0.0194710102, 0.00883859769, 0.0104368478, 0.0115294335, 0.0502061024, 0.0377715714, -0.0470889807, 0.0254153796, -0.0689847767, -0.0717464909, -0.06491, 0.0126711708, 0.0112836855, -0.0541676097, -0.0356421433, -0.0164782982, -0.0610802695, 0.0423853733, 0.0488026142, -0.0289505068, -0.000677486591, -0.0476895459, 0.00380550791, -0.0596526153, -0.00640337588, -0.0220143981, 0.0359431, 0.0322990753, -0.0474538691, 0.054927133, -0.00737987272, 0.0140833808, 0.0443712324, -0.043933779, 0.0409256071, 0.00772624137, -0.0229224265, -0.0722916, 0.0597278886, -0.0442352816, 0.00616668351, 0.000982288155, 0.0568430573, -0.060338337, 0.0552312955, 0.00631992472, -0.0821219385, 0.0561952889, 0.0680704713, 0.0169057455, -0.0601352453, -0.00401292695, 0.0737325177, -0.0767178312, 0.0433958881, 0.0168477539, -0.0603372641, 0.0803615153, -0.0396288484, 0.00747376308, 0.0123901023, -0.0492022783, -0.0297769308, 0.0600016415, 0.0327744, -0.0383736119, 0.00626307772, 0.075845331, 0.0704802126, -0.019431429, 0.0147908265, 0.0115813557, 0.051621, 0.00542780757, 0.0571184829, 0.0614894032, -0.0470786393, 0.0623328239, -0.052912388, -0.0170839764, 0.00801151618, -0.039427653, 0.0572161525, 0.00167220051, 0.0406115353, 0.042143587, 0.0578572936, 0.00313223619, -0.0323182568, -0.0586032756, -0.0639747679, -0.00879042, -0.00917307194, 0.0182663631, 0.0175092723, -0.0254115406, 0.00019361559, -0.0317655206, -0.0563735217, 0.078452155, 0.0507345274, 0.0173529927, -0.0490901545, -0.0822084844, 0.081648849, 0.0261113588, -0.0563871451, -0.0518015586, -0.0348771252, -0.0398101844, -0.0139666991, 0.0184999667, -0.0375076868, 0.0602380857, -0.0249493979, 0.0734049156, -0.0512630902, 0.0289901346, 0.0765650347, -0.0669102296, -0.0722649768, -0.0313954577, 0.0154615585, -0.00943078, 0.0787919909, 0.0673941597, 0.0311770607, 0.0354374647, -0.0265306365, 0.0437518097, -0.0732665658, 0.0489297248, 0.0655555576, -0.0393787138, -0.0388070829, 0.00970042683, -0.0311868507, -0.0827492177, -0.0206832308, -0.0519809723, -0.00493504945, -0.0685245842, -0.0282617398, 0.0355950221, -0.00734898634, 0.0376003645, 0.0253161155, 0.0584747083, 0.0212987959, -0.0384559371, 0.082592912, 0.0311547089, 0.0624623373, -0.0814144462, 0.0395354666, -0.0231002867, 0.0421228856, -0.0316452384, 0.00487433653, 0.081918, 0.0544711351, -0.0105061373, -0.0656491295, 0.0633879, 0.0184279773, 0.0244700145, -0.0186490659, 0.0489147231, -0.0642368719, -0.0202094428, 0.0435244404, 0.0441161953, -0.0397966728, 0.00379270967, -0.0826089457, -0.0361774899, 0.0148726385, 0.062031, 0.0274232719, 0.0018814319, -0.0396838896, -0.00243198988, 0.0693733543, 0.0090195518, -0.0290311389, 0.0483087078, 0.0133154988, -0.00168000278, 0.0018319498, 0.0636196136, -0.00776374154, -0.0360434279, -0.0814935938, -0.0158276167, 0.076635845, 0.0709537119, 0.031912, 0.0192611646, -0.0020914129, -0.0175421238, -0.0556306057, 0.0562396981, -0.0131248496, -0.0311832521, -0.0389447063, 0.0081713954, -0.0650436059, -0.0728608742, 0.0381941162, 0.0728306696, -0.0333044268, -0.0498978645, 0.0464298204, 0.0603470914, 0.0245335829, -0.0466825403, 0.0367866494, 0.0354647525, 0.0224961024, 0.00907058641, 0.0262220241, 0.00287109287, -0.0202807207, -0.0611601658, -0.0140948519, -0.0241945963, 0.0169137083, 0.04755513, -0.072648935, -0.0248457938, 0.0134985298, 0.0326770581, -0.0753605962, 0.0431795977, 0.0070912689, 0.0538257435, -0.0551753193, 0.0802449882, -0.0442277, 0.0728385448, 0.0388735756, 0.0478703529, 0.0146021023, -0.0253923088, -0.0707472, -0.0463880636, 0.0812347755, -0.0660555139, 0.0275677182, 0.074266471, -0.00873696245, 0.0740572587, 0.0300100874, 0.0488660261, 0.00390042644, 0.0183884613, 0.0513804667, -0.0134160146, 0.0104410294, -0.0548431613, 0.00267668464, -0.0045957393, -0.0431737155, 0.0447403081, 0.0280321818, 0.0606655292, -0.044766, 0.00433506304, 0.0612494163, -0.0300246403, -0.0127954045, 0.0151460106, -0.0455670394, -0.0663784593, -0.0198004544, -0.0314645022, -0.0233538225, 0.0375389, 0.0419015922, 0.0114331888, -0.0567778945, -0.0205252562, 0.0640368462, 0.0304049719, 0.0418470241, 0.0718007684, 0.0249215048, 0.0485383123, 0.0256850589, 0.0161422547, -0.0715882853, -0.031735085, 0.00729028787, -0.0248725768, -0.0534334667, -0.00668986514, -0.0530478396, 0.0210357681, -0.0808970109, 0.0302360952, -0.0505049229, 0.000117966345, 0.0827040151, 0.01088777, -0.0417144224, -0.050915949, 0.0712650046, -0.0804398507, 0.013461112, -0.0690629, 0.0565128922, 0.0593511537, 0.054564774, 0.00770380441, 0.0287936367, -0.0799678564, 0.0595743619, -0.0295372885, 0.0749069, -0.0206449535, -0.0704574063, 0.0716846809, 0.0305647962, 0.0119375754, 0.0403936505, 0.0181931518, -0.0112507623, 0.0250820667, 0.0193080865, -0.00632366771, -0.0114755929, -0.062191505, 0.0511554033, 0.0264797751, -0.04922387, 0.067763187, 0.0448058881, -0.0122261066, -0.0782086775, -0.0218615942, 0.0469329022, -0.0729536265, 0.039681755, 0.0633885041, 0.0739140883, 0.000466615224, 0.00981137808, 0.0484512709, -0.0618514754, -0.0511878841, -0.0678248182, 0.0320391618, 0.0265220441, 0.0426274426, -0.0408450514, -0.0365734622, 0.0174115263, -0.0102628972, -0.0563230067, 0.029912306, 0.0348706208, -0.0441398956, 0.0484397821, -0.00193183741, -0.053231325, -0.0031785618, 0.0546823815, -0.0622022822, -0.0735706, 0.00255643437, -0.0140257142, -0.00473003043, 0.0171921067, -0.0228456222, 0.0364877656, 0.0294562951, 0.0120815663, -0.00332700461, -0.0322813839, -0.0772143155, -0.0101970853, -0.0344852097, -0.0511004366, -0.059917938, -0.0165633541, 0.00150151621, 0.0252316985, -0.0292765591, 0.0350353047, 0.0313969888, -0.037920475, -0.0100262454, 0.0595380031, 0.0212509129, 0.00227843691, 0.0123936273, -0.0618601, -0.0711660236, -0.0463614725, -0.0176716968, -0.041117616, 0.0501014404, 0.0740635619, 0.0316318125, 0.0319646634, -0.041036129, -0.0492766201, -0.0410203375, 0.0154050356, 0.00724651804, -0.00200869422, 0.0144809075, -0.00493975263, -0.0142480489, 0.00977388769, -0.0168318432, 0.0133904684, -0.0764209, 0.0188833009, 0.0278521795, 0.0216211285, 0.0184155144, 0.0710755959, -0.0332404673, 0.0428488292, 0.0556732714, -0.0496581122, -0.018820351, -0.0691318437, -0.0581589, 0.0407969728, -0.0256147366, 0.0421869494, -0.00859052, -0.0579928793, 0.0408137292, 0.0261646267, 0.000727738428, 0.0523216, -0.00628263596, 0.00592074869, 0.00385966455, -0.0548521131, -0.0591544136, 0.0423886739, 0.0418718494, -0.00265934318, -0.000706568069, -0.0224871896, -0.0124037191, -0.0377913229, -0.031713672, 0.00600458356, 0.0358984321, -0.0611761697, 0.0498134196, 0.0270567555, -0.0348888636, -0.0337959, -0.0809504, -0.0481201671, -0.0369402543, 0.0323536061, -0.0332562253, 0.0485725701, -0.0484549701, 0.0437381528, 0.00597401569, 0.0328890048, -0.0480710231, 0.0604506582]]
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint("6777160975192686592", location="us-west1")

response = my_index_endpoint.match(deployed_index_id=DEPLOYED_INDEX_ID, queries=candidates_embedding, num_neighbors=10)
response

[[MatchNeighbor(id='https://www.huffingtonpost.com/entry/kentucky-voting-rights_us_5654806be4b0258edb32ebdc', distance=0.204626202583313),
  MatchNeighbor(id='https://www.huffingtonpost.com/entry/felon-voting-rights-restoration_us_5655de80e4b079b28189da1e', distance=0.44457077980041504),
  MatchNeighbor(id='https://www.huffingtonpost.com/entry/virginia-felons-voting-rights_us_57bb354ce4b0b51733a4d9f6', distance=0.5083640217781067),
  MatchNeighbor(id='https://www.huffingtonpost.com/entry/rick-scott-felon-disenfranchisement_us_5ac50856e4b09ef3b242f45f', distance=0.512243390083313),
  MatchNeighbor(id='https://www.huffpost.com/entry/florida-former-felons-vote-restored_n_5c332425e4b0bcb4c25da9b6', distance=0.5165200233459473),
  MatchNeighbor(id='https://www.huffingtonpost.com/entry/felony-voting-laws-are-confusing-activists-would-ditch_us_5ac6371ce4b01190c1ed6e41', distance=0.5221322178840637),
  MatchNeighbor(id='https://www.huffingtonpost.com/entry/alabama-felon-moral-turpitude_us_597c