# Using Feature Store for training and serving

In [1]:
import copy
import numpy as np
import os
import pprint
import pandas as pd
import random
import tensorflow as tf
import time

from google.cloud import aiplatform
from google.cloud import bigquery_datatransfer
from google.cloud import bigquery
from google.cloud import exceptions

from google.cloud.aiplatform_v1beta1 import (
    FeaturestoreOnlineServingServiceClient, FeaturestoreServiceClient)
from google.cloud.aiplatform_v1beta1.types import FeatureSelector, IdMatcher
from google.cloud.aiplatform_v1beta1.types import \
    entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_monitoring as featurestore_monitoring_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_online_service as featurestore_online_service_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2
from google.protobuf.duration_pb2 import Duration

## Configure lab settings

In [3]:
PROJECT_ID = 'jk-test-1002'
REGION = 'us-central1'
PREFIX = 'jktest2'

API_ENDPOINT = f'{REGION}-aiplatform.googleapis.com'

## Create Featurestore clients

Admin client for CRUD operations.

In [4]:
admin_client = FeaturestoreServiceClient(client_options={"api_endpoint": API_ENDPOINT})
BASE_RESOURCE_PATH = admin_client.common_location_path(PROJECT_ID, REGION)

Data client for accessing features.

In [5]:
data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)


## Create Featurestore and define schemas

### Create a feature store

In [6]:
FEATURESTORE_ID = f'{PREFIX}_featurestore'

In [7]:
create_lro = admin_client.create_featurestore(
    featurestore_service_pb2.CreateFeaturestoreRequest(
        parent=BASE_RESOURCE_PATH,
        featurestore_id=FEATURESTORE_ID,
        featurestore=featurestore_pb2.Featurestore(
            online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(
                fixed_node_count=3
            ),
        ),
    )
)

In [8]:
create_lro.result() # Wait

name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore"

#### List feature stores

In [9]:
admin_client.list_featurestores(parent=BASE_RESOURCE_PATH)

ListFeaturestoresPager<featurestores {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore"
  create_time {
    seconds: 1623720006
    nanos: 652243000
  }
  update_time {
    seconds: 1623720006
    nanos: 716464000
  }
  etag: "AMEw9yOmfMnQOsTQUYeGo3dBbnfEns-bw-1GZqR8blzp_IhqIdg_gabHMEZ0Z4Izv3X2"
  online_serving_config {
    fixed_node_count: 3
  }
  state: STABLE
}
>

#### Get your feature store

In [10]:
admin_client.get_featurestore(
    name=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID)
)

name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore"
create_time {
  seconds: 1623720006
  nanos: 652243000
}
update_time {
  seconds: 1623720006
  nanos: 716464000
}
etag: "AMEw9yPeWo1PU0UY_Ok-ZMEsF6ocdr6nvyzWQedwE0BPRXE9VeQ8C55rM9AZro-dBj57"
online_serving_config {
  fixed_node_count: 3
}
state: STABLE

### Create Entity Type

You can specify a monitoring config which will by default be inherited by all Features under this EntityType.

In [11]:
ENTITY_TYPE_ID = 'trips'
DESCRIPTION = 'Taxi trips'

In [12]:
entity_type_lro = admin_client.create_entity_type(
    featurestore_service_pb2.CreateEntityTypeRequest(
        parent=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
        entity_type_id=ENTITY_TYPE_ID,
        entity_type=entity_type_pb2.EntityType(
            description=DESCRIPTION,
            monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                snapshot_analysis=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
                    monitoring_interval=Duration(seconds=86400),  # 1 day
                ),
            ),
        ),
    )
)

# Similarly, wait for EntityType creation operation.
print(entity_type_lro.result())

name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips"
etag: "AMEw9yP07VywJ7lEs2egzVhJjv4p4TwLZtZXsBIAEgjaz5SJcx5l"



### Create Features

In [13]:
features=[
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Month of a trip",
            ),
            feature_id="trip_month",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Day of a trip",
            ),
            feature_id="trip_day",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Day of a week",
            ),
            feature_id="trip_day_of_week",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Hour of a trip",
            ),
            feature_id="trip_hour",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Trip duration in seconds",
            ),
            feature_id="trip_seconds",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.STRING,
                description="Payment type",
            ),

            feature_id="payment_type",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.STRING,
                description="Pick location",
            ),
            feature_id="pickup_grid",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.STRING,
                description="Dropoff location",
            ),
            feature_id="dropoff_grid",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.DOUBLE,
                description="Euclidean distance between pick up and dropoff",
            ),
            feature_id="euclidean",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.DOUBLE,
                description="Miles travelled during the trip",
            ),
            feature_id="trip_miles",
        ),
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=feature_pb2.Feature.ValueType.INT64,
                description="Trip tip classification",
            ),
            feature_id="tip_bin",
        ),
    ]

In [14]:
admin_client.batch_create_features(
    parent=admin_client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID),
    requests=features
).result()

features {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_month"
  etag: "AMEw9yOteYWrXooVUpqe7lZTR0qRoPI0qzIVWsf1uwtZBlSuBW0t"
}
features {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_day"
  etag: "AMEw9yM2lyrU0sgT5cwPqDRa62qYlsi3SMzG8I1Ylt2nBKvZV_8j"
}
features {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_day_of_week"
  etag: "AMEw9yMS5O8I2LJ6kGzTtduaDXnTVCEtYmplGaYoJbobrriFc38k"
}
features {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_hour"
  etag: "AMEw9yMZBVwtXUxRS3UFyTyL1ISn836a3laXBP0Syjmgr6aUMJyO"
}
features {
  name: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_seconds"
  etag: "AMEw9yNk6ozuDStKzGu9waMevtMW0h9mUYqE

### Discover features

#### Search for all features across all featurestores

In [15]:
for feature in admin_client.search_features(location=BASE_RESOURCE_PATH):
    print(feature.description)
    print(feature.name)

Dropoff location
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/dropoff_grid
Euclidean distance between pick up and dropoff
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/euclidean
Payment type
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/payment_type
Pick location
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/pickup_grid
Trip tip classification
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/tip_bin
Day of a trip
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_day
Day of a week
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_day_of_week
Hour of a trip
projects/890570890

#### Search for all features that are of type DOUBLE

In [16]:
features = admin_client.search_features(
    featurestore_service_pb2.SearchFeaturesRequest(
        location=BASE_RESOURCE_PATH, query="value_type=DOUBLE"
    )
)

for feature in features:
    print(feature.description)
    print(feature.name)

Euclidean distance between pick up and dropoff
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/euclidean
Miles travelled during the trip
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/trip_miles


#### Search for all features with specific keywords in their ID

In [17]:
features = admin_client.search_features(
    featurestore_service_pb2.SearchFeaturesRequest(
        location=BASE_RESOURCE_PATH, query="feature_id:grid AND value_type=STRING"
    )
)

for feature in features:
    print(feature.description)
    print(feature.name)

Dropoff location
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/dropoff_grid
Pick location
projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips/features/pickup_grid


## Import Feature Values

### Prepare import table

In [18]:
BQ_DATASET_NAME = f'{PREFIX}_dataset' 
BQ_TABLE_NAME = 'feature_staging_table'
BQ_LOCATION = 'US'
SAMPLE_SIZE = 500000
YEAR = 2020

In [19]:
client = bigquery.Client()

dataset_id = f'{PROJECT_ID}.{BQ_DATASET_NAME}'
dataset = bigquery.Dataset(dataset_id)
dataset.location = BQ_LOCATION

try:
    dataset = client.create_dataset(dataset, timeout=30)
    print('Created dataset: ', dataset_id)
except exceptions.Conflict:
    print('Dataset {} already exists'.format(dataset_id))

Dataset jk-test-1002.jktest2_dataset already exists


In [20]:
sql_script_template = '''
CREATE OR REPLACE TABLE `@PROJECT.@DATASET.@TABLE` 
AS (
    WITH
      taxitrips AS (
      SELECT
        unique_key AS trip_id,
        FORMAT_DATETIME('%Y-%d-%m', trip_start_timestamp) AS date,
        trip_start_timestamp,
        trip_seconds,
        trip_miles,
        payment_type,
        pickup_longitude,
        pickup_latitude,
        dropoff_longitude,
        dropoff_latitude,
        tips,
        fare
      FROM
        `bigquery-public-data.chicago_taxi_trips.taxi_trips`
      WHERE 1=1 
      AND pickup_longitude IS NOT NULL
      AND pickup_latitude IS NOT NULL
      AND dropoff_longitude IS NOT NULL
      AND dropoff_latitude IS NOT NULL
      AND trip_miles > 0
      AND trip_seconds > 0
      AND fare > 0
      AND EXTRACT(YEAR FROM trip_start_timestamp) = @YEAR
    )
    SELECT
      trip_id,
      trip_start_timestamp,
      EXTRACT(MONTH from trip_start_timestamp) as trip_month,
      EXTRACT(DAY from trip_start_timestamp) as trip_day,
      EXTRACT(DAYOFWEEK from trip_start_timestamp) as trip_day_of_week,
      EXTRACT(HOUR from trip_start_timestamp) as trip_hour,
      trip_seconds,
      trip_miles,
      payment_type,
      ST_AsText(
          ST_SnapToGrid(ST_GeogPoint(pickup_longitude, pickup_latitude), 0.1)
      ) AS pickup_grid,
      ST_AsText(
          ST_SnapToGrid(ST_GeogPoint(dropoff_longitude, dropoff_latitude), 0.1)
      ) AS dropoff_grid,
      ST_Distance(
          ST_GeogPoint(pickup_longitude, pickup_latitude), 
          ST_GeogPoint(dropoff_longitude, dropoff_latitude)
      ) AS euclidean,
      IF((tips/fare >= 0.2), 1, 0) AS tip_bin,
      CASE (ABS(MOD(FARM_FINGERPRINT(date),10))) 
          WHEN 9 THEN 'TEST'
          WHEN 8 THEN 'VALIDATE'
          ELSE 'TRAIN' END AS data_split
    FROM
      taxitrips
    LIMIT @LIMIT
)
'''


sql_script = sql_script_template.replace(
    '@PROJECT', PROJECT_ID).replace(
    '@DATASET', BQ_DATASET_NAME).replace(
    '@TABLE', BQ_TABLE_NAME).replace(
    '@YEAR', str(YEAR)).replace(
    '@LIMIT', str(SAMPLE_SIZE))

job = client.query(sql_script)
job.result()

<google.cloud.bigquery.table._EmptyRowIterator at 0x7f8a12c47410>

### Import features

In [21]:
entity_id_field = 'trip_id'
bq_table = f'bq://{PROJECT_ID}.{BQ_DATASET_NAME}.{BQ_TABLE_NAME}'

import_request = featurestore_service_pb2.ImportFeatureValuesRequest(
    entity_type=admin_client.entity_type_path(
        PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID
    ),
    bigquery_source=io_pb2.BigQuerySource(
        input_uri=bq_table
    ),
    entity_id_field=entity_id_field,
    feature_specs=[
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="tip_bin"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_month"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_day"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_day_of_week"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_hour"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="payment_type"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="pickup_grid"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="dropoff_grid"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="euclidean"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_seconds"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="trip_miles"),
    ],
    feature_time_field="trip_start_timestamp",
    worker_count=1,
)

In [22]:
ingestion_lro = admin_client.import_feature_values(import_request)

In [26]:
ingestion_lro.result()

imported_entity_count: 500000
imported_feature_value_count: 5500000

## Online serving

The
[Online Serving APIs](https://cloud.google.com/vertex-ai/featurestore/docs/reference/rpc/google.cloud.aiplatform.v1beta1#featurestoreonlineservingservice)
lets you serve feature values for small batches of entities. It's designed for latency-sensitive service, such as online model prediction. For example, for a movie service, you might want to quickly shows movies that the current user would most likely watch by using online predictions.

### Read one entity per request

The ReadFeatureValues API is used to read feature values of one entity; hence
its custom HTTP verb is `readFeatureValues`. By default, the API will return the  latest value of each feature, meaning the feature values with the most recent  timestamp.

To read feature values, specify the entity ID and features to read. The response
contains a `header` and an `entity_view`. Each row of data in the `entity_view`
contains one feature value, in the same order of features as listed in the response header.

In [39]:
feature_selector = FeatureSelector(
    id_matcher=IdMatcher(ids=["tip_bin", "trip_miles", "trip_day"])
)

features = data_client.read_feature_values(
    featurestore_online_service_pb2.ReadFeatureValuesRequest(
        # Fetch from the following feature store/entity type
        entity_type=admin_client.entity_type_path(
            PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID
        ),
        # Fetch the user features whose ID is "alice"
        entity_id="13311b767c033d82e37439228ef23fd1d018d061",
        feature_selector=feature_selector,
    )
)
features

header {
  entity_type: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips"
  feature_descriptors {
    id: "tip_bin"
  }
  feature_descriptors {
    id: "trip_miles"
  }
  feature_descriptors {
    id: "trip_day"
  }
}
entity_view {
  entity_id: "13311b767c033d82e37439228ef23fd1d018d061"
  data {
    value {
      int64_value: 0
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
  data {
    value {
      double_value: 1.1
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
  data {
    value {
      int64_value: 19
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
}

### Read multiple entities per request

In [40]:
response_stream = data_client.streaming_read_feature_values(
    featurestore_online_service_pb2.StreamingReadFeatureValuesRequest(
        entity_type=admin_client.entity_type_path(
            PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID
        ),
        entity_ids=["13311b767c033d82e37439228ef23fd1d018d061", "0e9be1edf79c3d88b4da5b9d11c2651538fb33b4"],
        feature_selector=feature_selector,
    )
)

In [41]:
for response in response_stream:
    print(response)

header {
  entity_type: "projects/890570890982/locations/us-central1/featurestores/jktest2_featurestore/entityTypes/trips"
  feature_descriptors {
    id: "tip_bin"
  }
  feature_descriptors {
    id: "trip_miles"
  }
  feature_descriptors {
    id: "trip_day"
  }
}

entity_view {
  entity_id: "0e9be1edf79c3d88b4da5b9d11c2651538fb33b4"
  data {
    value {
      int64_value: 0
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
  data {
    value {
      double_value: 0.9
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
  data {
    value {
      int64_value: 19
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
}

entity_view {
  entity_id: "13311b767c033d82e37439228ef23fd1d018d061"
  data {
    value {
      int64_value: 0
      metadata {
        generate_time {
          seconds: 1592524800
        }
      }
    }
  }
  data {
    value {


## Batch serving

Batch Serving is used to fetch a large batch of feature values for high-throughput, typically for training a model or batch prediction. In this section, you will learn how to prepare for training examples by calling the BatchReadFeatureValues API.

### Use case



In [30]:
%%bigquery
SELECT * 
FROM
`jk-test-1002.jktest2_dataset.feature_staging_table`
LIMIT 10

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 522.13query/s]                          
Downloading: 100%|██████████| 10/10 [00:01<00:00,  9.52rows/s]


Unnamed: 0,trip_id,trip_start_timestamp,trip_month,trip_day,trip_day_of_week,trip_hour,trip_seconds,trip_miles,payment_type,pickup_grid,dropoff_grid,euclidean,tip_bin,data_split
0,13311b767c033d82e37439228ef23fd1d018d061,2020-06-19 00:00:00+00:00,6,19,6,0,358,1.1,Prcard,POINT(-87.7 42),POINT(-87.7 42),0.0,0,TRAIN
1,0e9be1edf79c3d88b4da5b9d11c2651538fb33b4,2020-06-19 00:00:00+00:00,6,19,6,0,240,0.9,Cash,POINT(-87.7 42),POINT(-87.7 42),2406.040979,0,TRAIN
2,253a9ed082c397874cb5240c04af596a43ff745b,2020-06-19 00:00:00+00:00,6,19,6,0,383,0.8,Credit Card,POINT(-87.6 41.9),POINT(-87.6 41.9),0.0,0,TRAIN
3,3c62f8582be02c4f38b6980c554184b8ac4eaea5,2020-02-20 00:00:00+00:00,2,20,5,0,921,6.3,Cash,POINT(-87.8 41.9),POINT(-87.7 41.9),10106.03091,0,VALIDATE
4,6d1fc342d4059112b7dded8987dabb1af79302ee,2020-02-20 00:00:00+00:00,2,20,5,0,305,1.03,Cash,POINT(-87.7 41.9),POINT(-87.7 42),2400.207596,0,VALIDATE
5,f0c56f72fd74d36cff015ea126634d9627cad1f9,2020-02-20 00:00:00+00:00,2,20,5,0,1620,16.8,Cash,POINT(-87.9 42),POINT(-87.6 41.9),24859.93176,0,VALIDATE
6,647dc86ec97d68d299250a46d6070b6727213a2c,2020-02-20 00:00:00+00:00,2,20,5,0,675,1.86,Cash,POINT(-87.7 41.9),POINT(-87.6 41.9),3788.78383,0,VALIDATE
7,282c6baade7670bdcf5b029a6be7b686e6c145f5,2020-02-20 00:00:00+00:00,2,20,5,0,420,1.5,Credit Card,POINT(-87.6 41.9),POINT(-87.6 41.9),2078.005254,0,VALIDATE
8,339cf1db2ccc4e6034fdfb57f9bac4b21efe32f1,2020-02-20 00:00:00+00:00,2,20,5,0,1401,18.47,Cash,POINT(-87.9 42),POINT(-87.6 41.9),25467.906912,0,VALIDATE
9,6ccda2a0c84366b2cc89627619009484c62c019d,2020-02-20 00:00:00+00:00,2,20,5,0,300,1.0,Cash,POINT(-87.6 41.9),POINT(-87.6 41.9),1593.255669,0,VALIDATE


## Clean up

In [None]:
admin_client.delete_featurestore(
    featurestore_service_pb2.DeleteFeaturestoreRequest(
        name=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
        force=True
    )
)