In [124]:
from google.cloud import bigquery
import logging

bq_location = 'EU'
bq_project_id = "myfirstproject-226013"
bq_staging_table_uri = "myfirstproject-226013.telco.tmp-table-v5"
BUCKET_NAME = "gs://feature-store-mars21"

query = """
    SELECT customerID as customer,TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)  as timestamp, Churn as label
        FROM `myfirstproject-226013.telco.churn` WHERE 1=1
    """


client = bigquery.Client(project=bq_project_id, location=bq_location,)

overwrite_table = False
job_config = bigquery.QueryJobConfig(
    write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE if overwrite_table else bigquery.job.WriteDisposition.WRITE_EMPTY,
    destination = bq_staging_table_uri)

try:
    query_job = client.query(query = query, 
                             job_config = job_config)
    query_job.result()
    #if .total_rows == 0:
    #    raise Exception("Query return no rows".format(bq_staging_table_uri))

    if query_job.errors: 
        raise Exception() 
except Exception as e:
    logging.error(query_job.errors)
    raise e

table_dataset_metadata={}

table = client.get_table(bq_staging_table_uri)  # Make an API request.
table_dataset_path = "bq://{}".format(bq_staging_table_uri)
table_dataset_metadata['table_name'] = bq_staging_table_uri



if table.num_rows==0:
    raise Exception("BQ table {} has no rows. Ensure thet your query returns results: {}".format(bq_query_data_table, query))

In [125]:
from collections import OrderedDict # in case dict is not created using python>=3.6
schema = OrderedDict((i.name,i.field_type) for i in table.schema)
entity_type_cols = []
pass_through_cols = []
found_timestamp=False
for key, value in schema.items():
    if key=='timestamp':
        found_timestamp=True
        if value!="TIMESTAMP":
            raise ValueError("timestamp column must be of type TIMESTAMP")
    else:
        if found_timestamp==False:
            entity_type_cols.append(key)
        else:
            pass_through_cols.append(key)
        
if found_timestamp==False: # means timestamp column was not found so this remained False
    raise ValueError("timestamp column missing from BQ table. It is required for feature store data retrieval")

In [126]:
entity_type_cols

['customer']

In [127]:
pass_through_cols

['label']

In [128]:
fs_location = 'europe-west4'
fs_project = 'myfirstproject-226013'
fs_featurestore_name = 'telco'

fs_path= 'projects/{fs_project}/locations/{fs_location}/featurestores/{fs_name}'.format(fs_project=fs_project,
                                                   fs_location=fs_location,
                                                   fs_name=fs_featurestore_name)
    
from google.cloud.aiplatform_v1beta1 import FeaturestoreServiceClient

API_ENDPOINT = "{}-aiplatform.googleapis.com".format(fs_location)

admin_client = FeaturestoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

fs_entities = admin_client.list_entity_types(parent=fs_path).entity_types

fs_entities = [i.name.split('/')[-1] for i in fs_entities]

if len(set(entity_type_cols).difference(fs_entities))>0:
    raise ValueError("Table column(s) {} before timestamp column do not match entities in feature store {} ".format(entity_type_cols, fs_entities))

In [129]:
entity_type_cols

['customer']

In [130]:
my_features  = {'customer': ["tenure", "monthly_charges", "internet_service"]}
entities_diff = set(my_features.keys()).difference(entity_type_cols)
if len(entities_diff)>0:
    raise LookupError("\n Entities {} must exist in filtering query columns: {} ".format(entities_diff, query))

In [131]:
error_buffer = ""
for k,v in my_features.items():
    fs_features = admin_client.list_features(parent=fs_path+"/entityTypes/{}".format(k)).features
    fs_features = [i.name.split('/')[-1] for i in fs_features]
    
    missing_features = set(v).difference(fs_features)
    if len(missing_features)>0:
        error_buffer += "\n Features requested for entity [{}] do not exist: {}".format(k, missing_features)
        
if error_buffer!="":
    raise LookupError(error_buffer)

In [133]:
bq_export_table_uri="myfirstproject-226013.telco.features-table-v5"

from google.cloud.aiplatform_v1beta1.types import (featurestore_service as featurestore_service_pb2,
                                                   feature_selector as feature_selector_pb2,
                                                   BigQuerySource, BigQueryDestination)
entity_type_specs_arr=[]

# Select features to read
for ent_type, features_arr in my_features.items():
    entity_type_specs_arr.append(
        featurestore_service_pb2.BatchReadFeatureValuesRequest.EntityTypeSpec(
            # read feature values of features subscriber_type and duration_minutes from "bikes"
            entity_type_id= ent_type, 
            feature_selector= feature_selector_pb2.FeatureSelector(
                id_matcher=feature_selector_pb2.IdMatcher(
                ids=features_arr))
        )
    )
    
# Select columns to pass through

pass_through_fields_arr = []
for ptc in pass_through_cols:
    pass_through_fields_arr.append(
        featurestore_service_pb2.BatchReadFeatureValuesRequest.PassThroughField(
            # read feature values of features subscriber_type and duration_minutes from "bikes"
            field_name=ptc
        )
    )
   
batch_serving_request = featurestore_service_pb2.BatchReadFeatureValuesRequest(
    featurestore=fs_path,
    bigquery_read_instances=BigQuerySource(input_uri = "bq://{}".format(bq_staging_table_uri)),
    #csv_read_instances=io_pb2.CsvSource(
    #    gcs_source=io_pb2.GcsSource(uris=[FEATURE_REQ_CSV_PATH])),
    
    # Output info
    destination=featurestore_service_pb2.FeatureValueDestination(
        bigquery_destination=BigQueryDestination(
            # output to BigQuery table
            output_uri='bq://{}'.format(bq_export_table_uri))),
    #destination=featurestore_service_pb2.FeatureValueDestination(
    #    tfrecord_destination=io_pb2.CsvDestination(
    #        gcs_destination=EXPORT_TF_PATH)),
    
    entity_type_specs=entity_type_specs_arr,
    pass_through_fields=pass_through_fields_arr

    
)

try:
    print(admin_client.batch_read_feature_values(batch_serving_request).result(timeout=600))
except Exception as ex:
    print(ex)

customer

