# Featurestore - Mars21
## Feature retrieval (online & batch)
Christos Aniftos \
Soeren Petersen

In [1]:
from google.api_core import operations_v1
from google.cloud.aiplatform_v1beta1.types import io as io_pb2
from google.cloud.aiplatform_v1beta1.types.feature import Feature
from google.cloud.aiplatform_v1beta1 import FeaturestoreServiceClient
from google.cloud.aiplatform_v1beta1 import FeaturestoreOnlineServingServiceClient
from google.cloud.aiplatform_v1beta1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import feature_selector as feature_selector_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_online_service as featurestore_online_service_pb2

In [2]:
PROJECT_ID = "feature-store-mars21" # Change to your project id
LOCATION = "us-central1" 
API_ENDPOINT = LOCATION+"-aiplatform.googleapis.com" 
FEATURESTORE_ID = "universe"
ENTITY="customer"

## Define clients for FS admin and data management

In [3]:
# Create admin_client for CRUD and data_client for reading feature values.
admin_client = FeaturestoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

In [4]:
LOC_PATH = admin_client.common_location_path(PROJECT_ID, LOCATION)
FS_PATH = admin_client.featurestore_path(PROJECT_ID, LOCATION, FEATURESTORE_ID)
ENTITY_PATH = admin_client.entity_type_path(PROJECT_ID, LOCATION, FEATURESTORE_ID, ENTITY)
FEATURE_PATH = admin_client.feature_path(PROJECT_ID, LOCATION, FEATURESTORE_ID, ENTITY, '{}')

print("Location: \t", LOC_PATH)
print("Feature Store: \t", FS_PATH)
print("Entity: \t", ENTITY_PATH)
print("Feature: \t",FEATURE_PATH)

Location: 	 projects/feature-store-mars21/locations/us-central1
Feature Store: 	 projects/feature-store-mars21/locations/us-central1/featurestores/universe
Entity: 	 projects/feature-store-mars21/locations/us-central1/featurestores/universe/entityTypes/planets
Feature: 	 projects/feature-store-mars21/locations/us-central1/featurestores/universe/entityTypes/planets/features/{}


## Read Values from FS Online Storage - Real time!

In [5]:
feature_selector = feature_selector_pb2.FeatureSelector(
    id_matcher=feature_selector_pb2.IdMatcher(
        ids=["avg_max_temp_5d", "arr_max_temp_3d", "max_temp_std"]))

In [6]:
%%time
data_client.read_feature_values(
    featurestore_online_service_pb2.ReadFeatureValuesRequest(
        entity_type=ENTITY_PATH,
        entity_id="mars",
        feature_selector=feature_selector))

CPU times: user 20.3 ms, sys: 0 ns, total: 20.3 ms
Wall time: 407 ms


header {
  entity_type: "projects/202835066335/locations/us-central1/featurestores/universe/entityTypes/planets"
  feature_descriptors {
    id: "avg_max_temp_5d"
  }
  feature_descriptors {
    id: "arr_max_temp_3d"
  }
  feature_descriptors {
    id: "max_temp_std"
  }
}
entity_view {
  entity_id: "mars"
  data {
    value {
      double_value: -1.1666666666666667
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
  data {
    value {
      double_array_value {
        values: 0.0
        values: -1.0
        values: -4.0
      }
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
  data {
    value {
      double_value: 0.7954092224592259
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
}

In [7]:
%%time
response_stream = data_client.streaming_read_feature_values(
    featurestore_online_service_pb2.StreamingReadFeatureValuesRequest(
        entity_type=ENTITY_PATH,
        entity_ids=["mars", "jupyter"],
        feature_selector=feature_selector))

for response in response_stream:
  print(response)

header {
  entity_type: "projects/202835066335/locations/us-central1/featurestores/universe/entityTypes/planets"
  feature_descriptors {
    id: "avg_max_temp_5d"
  }
  feature_descriptors {
    id: "arr_max_temp_3d"
  }
  feature_descriptors {
    id: "max_temp_std"
  }
}

entity_view {
  entity_id: "jupyter"
  data {
    value {
      double_value: -3.5
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
  data {
    value {
      double_array_value {
        values: 0.0
        values: -3.0
        values: -12.0
      }
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
  data {
    value {
      double_value: 0.7954092224592264
      metadata {
        generate_time {
          seconds: 1351814400
        }
      }
    }
  }
}

entity_view {
  entity_id: "mars"
  data {
    value {
      double_value: -1.1666666666666667
      metadata {
        generate_time {
          seconds: 13518144

In [19]:
!gsutil cp feature-request.csv gs://feature-store-mars21/feature-request.csv

Copying file://feature-request.csv [Content-Type=text/csv]...
/ [1 files][  144.0 B/  144.0 B]                                                
Operation completed over 1 objects/144.0 B.                                      


In [20]:
EXPORT_TABLE_URI = "bq://feature-store-mars21.training.churn_data_v1"
FEATURE_REQ_CSV_PATH = "gs://feature-store-mars21/feature-request.csv"

In [21]:
batch_serving_request = featurestore_service_pb2.BatchReadFeatureValuesRequest(
    featurestore=FS_PATH,
    csv_read_instances=io_pb2.CsvSource(
        gcs_source=io_pb2.GcsSource(uris=[FEATURE_REQ_CSV_PATH])),
    
    # Output info
    destination=featurestore_service_pb2.FeatureValueDestination(
        bigquery_destination=io_pb2.BigQueryDestination(
            # output to BigQuery table
            output_uri=EXPORT_TABLE_URI)),
    #destination=featurestore_service_pb2.FeatureValueDestination(
    #    tfrecord_destination=io_pb2.CsvDestination(
    #        gcs_destination=EXPORT_TF_PATH)),
   

    # Select features to read
    entity_type_specs=[
        featurestore_service_pb2.BatchReadFeatureValuesRequest.EntityTypeSpec(
            # read feature values of features subscriber_type and duration_minutes from "bikes"
            entity_type_id=ENTITY, 
            feature_selector=feature_selector
        ),
        #featurestore_service_pb2.BatchReadFeatureValuesRequest.EntityTypeSpec(
        #    entity_type_id="spaceships",
        #    feature_selector=feature_selector_pb2.FeatureSelector(
        #        id_matcher=feature_selector_pb2.IdMatcher(
        #            ids=["size", "speed"])))
    ])

In [22]:
%%time
try:
    print(admin_client.batch_read_feature_values(batch_serving_request).result())
except Exception as ex:
    print(ex)


CPU times: user 14.8 ms, sys: 5.29 ms, total: 20 ms
Wall time: 25.2 s
