# Feature Store

## Params

In [18]:
PROJECT_ID = "egon-ongcp-demos"  # @param {type:"string"}
REGION = "us-central1" # @param {type:"string"}

INPUT_CSV_FILE = "gs://egon-ongcp-demos-vertex-pipelines-artifact/dataset.csv" # @param {type:"string"}
BIGQUERY_TABLE = "egon-ongcp-demos.mlops_e2e_20210523223149.dataset_cast" # @param {type:"string"}
ID_COLUMN = "id" # @param {type:"string"}
IGNORE_COLUMNS_INGESTION = ["id", "int64_field_0"] # @param {type:"list[string]"}

FEATURE_STORE_NAME_PREFIX = "mlops_e2e" # @param {type:"string"}
FEATURE_STORE_NODE_COUNT = 1 # @param {type:"int"}

ENTITY_TYPE_ID = "users" # @param {type:"string"}
ENTITY_TYPE_DESCRIPTION = "Users that uses or ask for credit" # @param {type:"string"}
ENTITY_TYPE_MONITORING_INTERVAL = 3600 # @param {type:"int"}

IMPORT_WORKER_COUNT = 1 # @param {type:"int"}

In [19]:
# Constants based on the params
BIGQUERY_SOURCE = f"bq://{BIGQUERY_TABLE}"
API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"

## Imports and Clients

In [20]:
from datetime import datetime
import re
import time

from google.api_core.exceptions import AlreadyExists

from google.cloud import bigquery

from google.cloud.aiplatform_v1beta1 import (
    FeaturestoreOnlineServingServiceClient, FeaturestoreServiceClient)
from google.cloud.aiplatform_v1beta1.types import FeatureSelector, IdMatcher
from google.cloud.aiplatform_v1beta1.types import \
    entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_monitoring as featurestore_monitoring_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_online_service as featurestore_online_service_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2
from google.protobuf.duration_pb2 import Duration
from google.protobuf.timestamp_pb2 import Timestamp

import pandas as pd

In [21]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [22]:
# Create admin_client for CRUD and data_client for reading feature values.
admin_client = FeaturestoreServiceClient(client_options={"api_endpoint": API_ENDPOINT})
data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

# Represents featurestore resource path.
BASE_RESOURCE_PATH = admin_client.common_location_path(PROJECT_ID, REGION)

## Create Feature Store

In [23]:
FEATURESTORE_ID = f"{FEATURE_STORE_NAME_PREFIX}_{TIMESTAMP}"
create_lro = admin_client.create_featurestore(
    featurestore_service_pb2.CreateFeaturestoreRequest(
        parent=BASE_RESOURCE_PATH,
        featurestore_id=FEATURESTORE_ID,
        featurestore=featurestore_pb2.Featurestore(
            online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(
                fixed_node_count=FEATURE_STORE_NODE_COUNT
            ),
        ),
    )
)

In [24]:
# Wait for LRO to finish and get the LRO result.
print(create_lro.result())

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714"



## Create Entity Type

In [25]:
# Create users entity type with monitoring enabled.
# All Features belonging to this EntityType will by default inherit the monitoring config.
users_entity_type_lro = admin_client.create_entity_type(
    featurestore_service_pb2.CreateEntityTypeRequest(
        parent=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
        entity_type_id=ENTITY_TYPE_ID,
        entity_type=entity_type_pb2.EntityType(
            description=ENTITY_TYPE_DESCRIPTION,
            monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                snapshot_analysis=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
                    monitoring_interval=Duration(seconds=ENTITY_TYPE_MONITORING_INTERVAL),
                ),
            ),
        ),
    )
)

# Similarly, wait for EntityType creation operation.
print(users_entity_type_lro.result())

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users"
etag: "AMEw9yMJydqOT2m1r_cU6nOUXxxu5D0DlW6wAXnH0apuwYGxvLBh"



In [26]:
param_dict = {"table": BIGQUERY_TABLE}

In [27]:
%%bigquery df

SELECT * FROM `egon-ongcp-demos.mlops_e2e_20210523223149.dataset_cast` LIMIT 1000

Query complete after 0.46s: 100%|██████████| 1/1 [00:00<00:00, 236.89query/s]                          
Downloading: 100%|██████████| 1000/1000 [00:00<00:00, 1241.00rows/s]


In [34]:
df.head()

Unnamed: 0,int64_field_0,gender,car,reality,days_birth,days_employed,flag_mobil,famsize,begin_month,chldno_1,...,famtp_separated,famtp_singlenotmarried,famtp_widow,id,inc,wkphone,phone,email,dep_value,target
0,22935,0.0,0.0,1.0,-21532.0,-4630.0,1.0,1,-57.0,0.0,...,0,1,0,5094890,9,0,0,0,False,0
1,22934,0.0,0.0,1.0,-21532.0,-4630.0,1.0,1,-45.0,0.0,...,0,1,0,5094889,9,0,0,0,False,0
2,22933,0.0,0.0,1.0,-21532.0,-4630.0,1.0,1,-57.0,0.0,...,0,1,0,5094888,9,0,0,0,False,0
3,28983,0.0,0.0,0.0,-11792.0,-126.0,1.0,3,-40.0,1.0,...,0,0,0,5116924,9,0,0,0,False,0
4,14878,1.0,1.0,1.0,-11794.0,-1720.0,1.0,2,-17.0,0.0,...,0,0,0,5061467,9,1,1,0,False,0


In [28]:
create_feature_requests = []
feature_specs = []

mapping = {
    "float64": feature_pb2.Feature.ValueType.DOUBLE,
    "uint8": feature_pb2.Feature.ValueType.INT64,
    "int64": feature_pb2.Feature.ValueType.INT64,
    "object": feature_pb2.Feature.ValueType.STRING
}

feature_pb2.Feature.ValueType.INT

columns = df.columns.tolist()
dtypes_list = df.dtypes.tolist()
for i, types in enumerate(dtypes_list):
    if columns[i] in IGNORE_COLUMNS_INGESTION:
        continue
    create_feature_requests.append(
        featurestore_service_pb2.CreateFeatureRequest(
            feature=feature_pb2.Feature(
                value_type=mapping[str(types)],
                description=columns[i]          
            ),
            parent=admin_client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID),
            feature_id=re.sub(r'[\W]+', '', columns[i]).lower(),
        )
    )
    feature_specs.append(
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id=re.sub(r'[\W]+', '', columns[i]).lower())
    )

  

In [29]:
for request in create_feature_requests:
    try:
        print(admin_client.create_feature(request).result())
    except AlreadyExists as e:
        print(e)  

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/gender"
etag: "AMEw9yOwngTypc4nFLDcuf8S9okBdjJH8pzpbeEXF99KK3naoy6i"

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/car"
etag: "AMEw9yPl6K2HxIDey5TukQ8EhPlwVrOdakJvrUZO2CU9rWugHSkQ"

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/reality"
etag: "AMEw9yOncuRs5wz4lOwchpYKiOOLxiF7i-7E8Ti8qwemP-B0LSeJ"

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/days_birth"
etag: "AMEw9yP_W4SDSMUc6VqWb7dYa8uhRSCO9QmjMesmNQcPjaNrFWZT"

name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/days_employed"
etag: "AMEw9yMErcxgIuHCBYCdvw724IBn5uH8eobgSuoEaiOHnv-5eu6T"

name: "projects/203258207752/locations/us-central1/featu

In [30]:
now = time.time()
seconds = int(now)
nanos = int((now - seconds) * 10**9)
timestamp = Timestamp(seconds=seconds)

In [31]:
import_request = featurestore_service_pb2.ImportFeatureValuesRequest(
    entity_type=admin_client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, ENTITY_TYPE_ID),
    bigquery_source=io_pb2.BigQuerySource(input_uri=BIGQUERY_SOURCE),
    entity_id_field=ID_COLUMN,
    feature_specs=feature_specs,
    feature_time=timestamp,
    worker_count=IMPORT_WORKER_COUNT,
)
# Start to import, will take a couple of minutes
ingestion_lro = admin_client.import_feature_values(import_request)

In [32]:
# Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()

imported_entity_count: 25134
imported_feature_value_count: 1156164

In [33]:
list(admin_client.list_featurestores(
    parent=admin_client.common_location_path(PROJECT_ID, REGION)))

[name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714"
 create_time {
   seconds: 1631882268
   nanos: 470732000
 }
 update_time {
   seconds: 1631882268
   nanos: 561040000
 }
 etag: "AMEw9yNSi0qmv4Mrro34GjPY7G3OvhradbrZoG1RIdD68kBkNjrWpZXULqFDQhW079Rc"
 online_serving_config {
   fixed_node_count: 1
 }
 state: STABLE]

In [25]:
from google.protobuf import field_mask_pb2

FEATURESTORE_ID = "mlops_e2e_20210917123714"
featurestore = admin_client.get_featurestore(
    name=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID)
)
featurestore.online_serving_config.fixed_node_count = 1
update_lro = admin_client.update_featurestore(
    featurestore = featurestore,
    update_mask=field_mask_pb2.FieldMask(paths=["online_serving_config.fixed_node_count"]))

In [10]:
FEATURE_STORE_ID = "mlops_e2e_20210917123714"
featurestore = admin_client.get_featurestore(
    name=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURE_STORE_ID)
)

In [36]:
# Fetch the following 3 features.
feature_selector = FeatureSelector(
    id_matcher=IdMatcher(ids=["age", "car"])
)

data_client.read_feature_values(
    featurestore_online_service_pb2.ReadFeatureValuesRequest(
        # Fetch from the following feature store/entity type
        entity_type=admin_client.entity_type_path(
            PROJECT_ID, REGION, FEATURESTORE_ID, "users"
        ),
        # Fetch the user features whose ID is "alice"
        entity_id="5094890",
        feature_selector=feature_selector,
    )
)

header {
  entity_type: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users"
  feature_descriptors {
    id: "age"
  }
  feature_descriptors {
    id: "car"
  }
}
entity_view {
  entity_id: "5094890"
  data {
    value {
      double_value: 58.0
      metadata {
        generate_time {
          seconds: 1631882438
        }
      }
    }
  }
  data {
    value {
      double_value: 0.0
      metadata {
        generate_time {
          seconds: 1631882438
        }
      }
    }
  }
}

In [37]:
admin_client.search_features(featurestore_service_pb2.SearchFeaturesRequest(location=BASE_RESOURCE_PATH, query="feature_id=age"))

SearchFeaturesPager<features {
  name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/age"
  description: "age"
  create_time {
    seconds: 1631882372
    nanos: 339132000
  }
  update_time {
    seconds: 1631885042
    nanos: 606486000
  }
}
>

In [38]:
admin_client.search_features(featurestore_service_pb2.SearchFeaturesRequest(location=BASE_RESOURCE_PATH, query="description=age"))

SearchFeaturesPager<features {
  name: "projects/203258207752/locations/us-central1/featurestores/mlops_e2e_20210917123714/entityTypes/users/features/age"
  description: "age"
  create_time {
    seconds: 1631882372
    nanos: 339132000
  }
  update_time {
    seconds: 1631885042
    nanos: 606486000
  }
}
>

In [41]:
FEATURE_STORE_ID = "mlops_e2e_20210917123714"

In [42]:
admin_client.delete_featurestore(featurestore_service_pb2.DeleteFeaturestoreRequest(name=admin_client.featurestore_path(PROJECT_ID, REGION, FEATURE_STORE_ID), force=True))

<google.api_core.operation.Operation at 0x7f843a575750>