In [349]:
from google.cloud import bigquery
import time, logging

In [487]:
class BQTableEmpty(Exception):
    """Exception raised for empty BQTable

    Attributes:
        message -- explanation of the error
    """

    def __init__(self, message="Table has no rows"):
        self.message = message
        super().__init__(self.message)

In [541]:
bq_location = 'US'
bq_project_id = "feature-store-mars21"
bq_dataset_id = "mars"
bq_query_table_id = "tmp-table-v13"
bq_export_table_id = "training-v1"
overwrite_table = True

bq_query_data_table="{project}.{dataset}.{table}".format(
    project=bq_project_id, 
    dataset=bq_dataset_id, 
    table=bq_query_table_id)

bq_export_data_table="{project}.{dataset}.{table}".format(
    project=bq_project_id, 
    dataset=bq_dataset_id, 
    table=bq_export_table_id)

client = bigquery.Client(
    project=bq_project_id,
    location=bq_location,)

query = """
SELECT planet as planets, terrestrial_date as timestamp, 5 as pt
    FROM `feature-store-mars21.mars.three_planets_tmp` Where 1=2
"""

    
job_config = bigquery.QueryJobConfig(
    write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE if overwrite_table else bigquery.job.WriteDisposition.WRITE_EMPTY,
    destination = bq_query_data_table)

try:
    query_job = client.query(query = query, 
                             job_config = job_config)
    query_job.result()
    #if .total_rows == 0:
    #    raise Exception("Query return no rows".format(bq_query_data_table))
        
    if query_job.errors: 
        raise Exception() 
except Exception as e:
    logging.error(query_job.errors)
    raise e
    
    
    
table = client.get_table(bq_query_data_table)  # Make an API request.

# View table properties
print("Table schema: {}".format(table.schema))
print("Table description: {}".format(table.description))
print("Table has {} rows".format(table.num_rows))

Table schema: [SchemaField('planets', 'STRING', 'NULLABLE', None, (), ()), SchemaField('timestamp', 'TIMESTAMP', 'NULLABLE', None, (), ()), SchemaField('pt', 'INTEGER', 'NULLABLE', None, (), ())]
Table description: None
Table has 0 rows


In [530]:
#if table.num_rows==0:
#    raise Exception("BQ table {} has no rows".format(bq_query_data_table))

In [531]:
from collections import OrderedDict # in case dict is not created using python>=3.6
schema = OrderedDict((i.name,i.field_type) for i in table.schema)

In [532]:
entity_type_cols = []
pass_through_cols = []
reading_entity_types=True
for key, value in schema.items():
    if key=='timestamp':
        reading_entity_types=False
        if value!="TIMESTAMP":
            raise ValueError("timestamp column must be of type TIMESTAMP")
    else:
        if reading_entity_types==True:
            entity_type_cols.append(key)
        else:
            pass_through_cols.append(key)
        
if reading_entity_types==True: # means timestamp column was not found
    raise ValueError("timestamp column missing from BQ table. It is required for feature store data retrieval")

In [533]:
entity_type_cols

['planets']

In [534]:
pass_through_cols

['pt']

In [535]:
# validate entity types

In [536]:
fs_location = 'us-central1'
fs_project = 'feature-store-mars21'
fs_featurestore_name = 'universe'

fs_path= 'projects/{fs_project}/locations/{fs_location}/featurestores/{fs_name}'.format(fs_project=fs_project,
                                                   fs_location=fs_location,
                                                   fs_name=fs_featurestore_name)
    
from google.cloud.aiplatform_v1beta1 import FeaturestoreServiceClient

API_ENDPOINT = "{}-aiplatform.googleapis.com".format(fs_location)

admin_client = FeaturestoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

fs_entities = admin_client.list_entity_types(parent=fs_path).entity_types

fs_entities = [i.name.split('/')[-1] for i in fs_entities]

if len(set(entity_type_cols).difference(fs_entities))>0:
    raise ValueError("Table column(s) {} before timestamp column do not match entities in feature store {} ".format(entity_type_cols, fs_entities))

In [537]:
# read from BQ and export to BQ

In [538]:
# features to retrieve for each entity type
my_features  = {'planets': ["avg_max_temp_5d", "arr_max_temp_3d", "min_temp_std"]}
feature_diff = set(my_features.keys()).difference(entity_type_cols)
if len(feature_diff)>0:
    raise LookupError("Features requested for entities {} that does not exist in filtering query columns: {} ".format(feature_diff, query))

In [542]:
entity_type_cols

entity_type_specs_arr=[]

# Select features to read
for ent_type, features_arr in my_features.items():
    entity_type_specs_arr.append(
        featurestore_service_pb2.BatchReadFeatureValuesRequest.EntityTypeSpec(
            # read feature values of features subscriber_type and duration_minutes from "bikes"
            entity_type_id= ent_type, 
            feature_selector= feature_selector_pb2.FeatureSelector(
                id_matcher=feature_selector_pb2.IdMatcher(
                ids=features_arr))
        )
    )
    
batch_serving_request = featurestore_service_pb2.BatchReadFeatureValuesRequest(
    featurestore=fs_path,
    bigquery_read_instances=BigQuerySource(input_uri = "bq://{}".format(bq_query_data_table)),
    #csv_read_instances=io_pb2.CsvSource(
    #    gcs_source=io_pb2.GcsSource(uris=[FEATURE_REQ_CSV_PATH])),
    
    # Output info
    destination=featurestore_service_pb2.FeatureValueDestination(
        bigquery_destination=io_pb2.BigQueryDestination(
            # output to BigQuery table
            output_uri='bq://{}'.format(bq_export_data_table))),
    #destination=featurestore_service_pb2.FeatureValueDestination(
    #    tfrecord_destination=io_pb2.CsvDestination(
    #        gcs_destination=EXPORT_TF_PATH)),
    
   entity_type_specs=entity_type_specs_arr


)

In [543]:
%%time
try:
    print(admin_client.batch_read_feature_values(batch_serving_request).result())
except Exception as ex:
    print(ex)


CPU times: user 25.8 ms, sys: 13.9 ms, total: 39.7 ms
Wall time: 1min 6s
