Skip to content

Commit

Permalink
Add configuration for bigquery entity staging location (#77)
Browse files Browse the repository at this point in the history
Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
  • Loading branch information
khorshuheng and khorshuheng committed Jun 10, 2021
1 parent baeac51 commit 45413ca
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
27 changes: 18 additions & 9 deletions python/feast_spark/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,16 +165,25 @@ def get_historical_features(

if isinstance(entity_source, pd.DataFrame):
if any(isinstance(source, BigQuerySource) for source in feature_sources):
first_bq_source = [
source
for source in feature_sources
if isinstance(source, BigQuerySource)
][0]
source_ref = table_reference_from_string(
first_bq_source.bigquery_options.table_ref
)
if self.config.exists(opt.BQ_STAGING_PROJECT) and self.config.exists(
opt.BQ_STAGING_DATASET
):
staging_bq_project = self.config.get(opt.BQ_STAGING_PROJECT)
staging_bq_dataset = self.config.get(opt.BQ_STAGING_DATASET)
else:
first_bq_source = [
source
for source in feature_sources
if isinstance(source, BigQuerySource)
][0]
source_ref = table_reference_from_string(
first_bq_source.bigquery_options.table_ref
)
staging_bq_project = source_ref.project
staging_bq_dataset = source_ref.dataset_id

entity_source = stage_entities_to_bq(
entity_source, source_ref.project, source_ref.dataset_id
entity_source, staging_bq_project, staging_bq_dataset
)
else:
entity_source = stage_entities_to_fs(
Expand Down
10 changes: 10 additions & 0 deletions python/feast_spark/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,16 @@ class ConfigOptions(metaclass=ConfigMeta):
#: That may help to control amount of write requests to storage
SPARK_STREAMING_TRIGGERING_INTERVAL: Optional[str] = None

#: GCP project of the BigQuery dataset used to stage the entities during historical
#: feature retrieval. If not set, the GCP project of the feature table batch source
#: will be used instead.
BQ_STAGING_PROJECT: Optional[str] = None

#: BigQuery dataset used to stage the entities during historical feature retrieval.
# If not set, the BigQuery dataset of the batch source will be used
#: instead.
BQ_STAGING_DATASET: Optional[str] = None

def defaults(self):
return {
k: getattr(self, k)
Expand Down

0 comments on commit 45413ca

Please sign in to comment.