## Get feature values
This notebook shows how feature values can be fetched from the onlione store with the python SDK.  
Make sure that the first data sync for each featureview has finished before requesting feature values.

In [1]:
from google.cloud.aiplatform_v1beta1 import (
    FeatureOnlineStoreAdminServiceClient,
    FeatureOnlineStoreServiceClient,
    NearestNeighborQuery,
    SearchNearestEntitiesRequest,
)
from google.cloud.aiplatform_v1beta1.types import (
    feature_online_store_service as feature_online_store_service_pb2,
)

PROJECT_ID = "vertex-feature-store"
REGION = "europe-west4"
FEATUREONLINESTORE = f"projects/{PROJECT_ID}/locations/{REGION}/featureOnlineStores/example_online_store"
FEATUREVIEW_BASE = f"{FEATUREONLINESTORE}/featureViews/"
API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"

In [2]:
admin_client = FeatureOnlineStoreAdminServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)
PUBLIC_ENDPOINT = admin_client.get_feature_online_store(
    name=FEATUREONLINESTORE
).dedicated_serving_endpoint.public_endpoint_domain_name

In [3]:
def sync_is_done(feature_view: str):
    """Check if at least one sync for the feature view has finished."""
    finished_syncs = [
        s
        for s in admin_client.list_feature_view_syncs(parent=feature_view)
        if s.run_time.HasField("end_time")
    ]
    if not finished_syncs:
        print("No finished syncs found.")
        return False
    return True

### Retrieve feature values by entity ID

In [4]:
def get_feature_values_by_id(feature_view: str, entitiy_id: str):
    data_client = FeatureOnlineStoreServiceClient(
        client_options={"api_endpoint": API_ENDPOINT}
    )
    if not sync_is_done(feature_view):
        raise ValueError(
            "No data available for the feature view yet. Please wait for the first sync to finish."
        )

    return data_client.fetch_feature_values(
        feature_online_store_service_pb2.FetchFeatureValuesRequest(
            id=entitiy_id,
            feature_view=feature_view,
        )
    )

In [5]:
feature_view_name = "user_featureview"
entity_id = "1"

response = get_feature_values_by_id(f"{FEATUREVIEW_BASE}{feature_view_name}", entity_id)
print(response)

key_values {
  features {
    value {
      int64_value: 1720105561738302
    }
    name: "feature_timestamp"
  }
  features {
    value {
      string_value: "user1"
    }
    name: "username"
  }
  features {
    value {
      string_value: "user1@gmail.com"
    }
    name: "email"
  }
  features {
    value {
      int64_value: 25
    }
    name: "age"
  }
  features {
    value {
      string_value: "M"
    }
    name: "gender"
  }
}



In [6]:
feature_view_name = "user_rating_featureview"
entity_id = "5"

response = get_feature_values_by_id(f"{FEATUREVIEW_BASE}{feature_view_name}", entity_id)
print(response)

key_values {
  features {
    value {
      int64_value: 1720105561224594
    }
    name: "feature_timestamp"
  }
  features {
    value {
      int64_value: 1
    }
    name: "num_rating_90d"
  }
  features {
    value {
      double_value: 4
    }
    name: "avg_rating_90d"
  }
}



### Search with embedding nearest neighbor

By entity ID


In [7]:
def get_neighbors_by_id(feature_view: str, entitiy_id: str, k: int):
    # NOTE: the client uses the public endpoint here
    data_client = FeatureOnlineStoreServiceClient(
        client_options={"api_endpoint": PUBLIC_ENDPOINT}
    )
    if not sync_is_done(feature_view):
        raise ValueError(
            "No data available for the feature view yet. Please wait for the first sync to finish."
        )

    query = NearestNeighborQuery(entity_id=entitiy_id, neighbor_count=k)

    request = SearchNearestEntitiesRequest(feature_view=feature_view, query=query)
    return data_client.search_nearest_entities(request=request)

In [8]:
feature_view_name = "movie_embedding_featureview"
feature_view = f"{FEATUREVIEW_BASE}{feature_view_name}"
embedding_dim = 1536

neighbors = get_neighbors_by_id(feature_view, "1", k=3)
print(neighbors)

nearest_neighbors {
  neighbors {
    entity_id: "1"
    distance: -0.99999988079071045
  }
  neighbors {
    entity_id: "24"
    distance: -0.55016779899597168
  }
  neighbors {
    entity_id: "8"
    distance: -0.54250270128250122
  }
}



By embedding

In [9]:
def get_neighbors_by_embedding(feature_view: str, embedding: list[float], k: int):
    data_client = FeatureOnlineStoreServiceClient(
        client_options={"api_endpoint": PUBLIC_ENDPOINT}
    )
    if not sync_is_done(feature_view):
        raise ValueError(
            "No data available for the feature view yet. Please wait for the first sync to finish."
        )
    
    query = NearestNeighborQuery(
        embedding=NearestNeighborQuery.Embedding(value=embedding), neighbor_count=k
    )
    request = SearchNearestEntitiesRequest(feature_view=feature_view, query=query)
    return data_client.search_nearest_entities(request=request)

In [10]:
neighbors = get_neighbors_by_embedding(feature_view, [0.1] * embedding_dim, k=3)
print(neighbors)

nearest_neighbors {
  neighbors {
    entity_id: "3"
    distance: -0.20326116681098938
  }
  neighbors {
    entity_id: "27"
    distance: -0.19727082550525665
  }
  neighbors {
    entity_id: "2"
    distance: -0.19693556427955627
  }
}

