Skip to content

Commit

Permalink
fix: Switch from join_key to join_keys in tests and docs (#2580)
Browse files Browse the repository at this point in the history
* Switch from `join_key` to `join_keys` in tests and docs

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Convert iterator to list so it can be used repeatedly

Signed-off-by: Felix Wang <wangfelix98@gmail.com>

* Format

Signed-off-by: Felix Wang <wangfelix98@gmail.com>
  • Loading branch information
felixwang9817 authored and adchia committed Apr 20, 2022
1 parent 8717bc8 commit 6130b80
Show file tree
Hide file tree
Showing 16 changed files with 85 additions and 33 deletions.
2 changes: 1 addition & 1 deletion docs/getting-started/concepts/entity.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
An entity is a collection of semantically related features. Users define entities to map to the domain of their use case. For example, a ride-hailing service could have customers and drivers as their entities, which group related features that correspond to these customers and drivers.

```python
driver = Entity(name='driver', value_type=ValueType.STRING, join_key='driver_id')
driver = Entity(name='driver', value_type=ValueType.STRING, join_keys=['driver_id'])
```

Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details.
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/concepts/feature-view.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ It is suggested that you dynamically specify the new FeatureView name using `.wi
from feast import BigQuerySource, Entity, FeatureView, Field, ValueType
from feast.types import Int32

location = Entity(name="location", join_key="location_id", value_type=ValueType.INT64)
location = Entity(name="location", join_keys=["location_id"], value_type=ValueType.INT64)

location_stats_fv= FeatureView(
name="location_stats",
Expand Down
4 changes: 2 additions & 2 deletions docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ driver_hourly_stats = FileSource(
# fetch features.
# Entity has a name used for later reference (in a feature view, eg)
# and join_key to identify physical field name used in storages
driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",)
driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down Expand Up @@ -168,7 +168,7 @@ driver_hourly_stats = FileSource(
# fetch features.
# Entity has a name used for later reference (in a feature view, eg)
# and join_key to identify physical field name used in storages
driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",)
driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/validating-historical-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ batch_source = FileSource(


```python
taxi_entity = Entity(name='taxi', join_key='taxi_id')
taxi_entity = Entity(name='taxi', join_keys=['taxi_id'])
```


Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
DUMMY_ENTITY_NAME = "__dummy"
DUMMY_ENTITY_VAL = ""
DUMMY_ENTITY = Entity(
name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.STRING,
name=DUMMY_ENTITY_NAME, join_keys=[DUMMY_ENTITY_ID], value_type=ValueType.STRING,
)


Expand Down
4 changes: 3 additions & 1 deletion sdk/python/feast/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def update_entities_with_inferred_types_from_feature_views(
if not (incomplete_entities_keys & set(view.entities)):
continue # skip if view doesn't contain any entities that need inference

col_names_and_types = view.batch_source.get_table_column_names_and_types(config)
col_names_and_types = list(
view.batch_source.get_table_column_names_and_types(config)
)
for entity_name in view.entities:
if entity_name in incomplete_entities:
entity = incomplete_entities[entity_name]
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/aws/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
# The storage level type for an entity
value_type=ValueType.INT64,
)
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/gcp/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
# The storage level type for an entity
value_type=ValueType.INT64,
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/templates/local/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Define an entity for the driver. You can think of entity as a primary key used to
# fetch features.
driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64,)
driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64,)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/snowflake/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
)

# Indicates a data source from which feature values can be retrieved. Sources are queried when building training
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/tests/example_repos/example_feature_repo_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@

driver = Entity(
name="driver", # The name is derived from this argument, not object name.
join_key="driver_id",
join_keys=["driver_id"],
value_type=ValueType.INT64,
description="driver id",
)

customer = Entity(
name="customer", # The name is derived from this argument, not object name.
join_key="customer_id",
join_keys=["customer_id"],
value_type=ValueType.STRING,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
name="driver_id",
value_type=ValueType.INT64,
description="driver id",
join_key="driver",
join_keys=["driver"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def driver(value_type: ValueType = ValueType.INT64):
name="driver", # The name is derived from this argument, not object name.
value_type=value_type,
description="driver id",
join_key="driver_id",
join_keys=["driver_id"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm
created_timestamp_column="created",
)

driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64)
driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64)
driver_fv = FeatureView(
name="driver_stats",
entities=["driver"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source):
with prep_file_source(
df=dataframe_source, event_timestamp_column="ts_1"
) as file_source:
entity = Entity(name="id", join_key="id_join_key", value_type=ValueType.INT64)
entity = Entity(
name="id", join_keys=["id_join_key"], value_type=ValueType.INT64
)

fv1 = FeatureView(
name="fv1",
Expand Down Expand Up @@ -436,7 +438,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source):
df=dataframe_source, event_timestamp_column="ts_1"
) as file_source:

e = Entity(name="id", join_key="id_join_key", value_type=ValueType.STRING)
e = Entity(name="id", join_keys=["id_join_key"], value_type=ValueType.STRING)

# Create Feature View
fv1 = FeatureView(
Expand Down
66 changes: 57 additions & 9 deletions sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
update_entities_with_inferred_types_from_feature_views,
update_feature_views_with_inferred_features,
)
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)
from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import String, UnixTimestamp
from feast.types import Float32, String, UnixTimestamp
from tests.utils.data_source_utils import (
prep_file_source,
simple_bq_source_using_query_arg,
Expand All @@ -53,8 +54,8 @@ def test_update_entities_with_inferred_types_from_feature_views(
name="fv2", entities=["id"], batch_source=file_source_2, ttl=None,
)

actual_1 = Entity(name="id", join_key="id_join_key")
actual_2 = Entity(name="id", join_key="id_join_key")
actual_1 = Entity(name="id", join_keys=["id_join_key"])
actual_2 = Entity(name="id", join_keys=["id_join_key"])

update_entities_with_inferred_types_from_feature_views(
[actual_1], [fv1], RepoConfig(provider="local", project="test")
Expand All @@ -63,16 +64,16 @@ def test_update_entities_with_inferred_types_from_feature_views(
[actual_2], [fv2], RepoConfig(provider="local", project="test")
)
assert actual_1 == Entity(
name="id", join_key="id_join_key", value_type=ValueType.INT64
name="id", join_keys=["id_join_key"], value_type=ValueType.INT64
)
assert actual_2 == Entity(
name="id", join_key="id_join_key", value_type=ValueType.STRING
name="id", join_keys=["id_join_key"], value_type=ValueType.STRING
)

with pytest.raises(RegistryInferenceFailure):
# two viable data types
update_entities_with_inferred_types_from_feature_views(
[Entity(name="id", join_key="id_join_key")],
[Entity(name="id", join_keys=["id_join_key"])],
[fv1, fv2],
RepoConfig(provider="local", project="test"),
)
Expand Down Expand Up @@ -173,9 +174,9 @@ def test_on_demand_features_type_inference():

@on_demand_feature_view(
sources={"date_request": date_request},
features=[
Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP),
Feature(name="string_output", dtype=ValueType.STRING),
schema=[
Field(name="output", dtype=UnixTimestamp),
Field(name="string_output", dtype=String),
],
)
def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -284,3 +285,50 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame:

with pytest.raises(SpecifiedFeaturesNotPresentError):
test_view_with_missing_feature.infer_features()


def test_update_feature_views_with_inferred_features():
file_source = FileSource(name="test", path="test path")
entity1 = Entity(name="test1", join_keys=["test_column_1"])
entity2 = Entity(name="test2", join_keys=["test_column_2"])
feature_view_1 = FeatureView(
name="test1",
entities=[entity1],
schema=[
Field(name="feature", dtype=Float32),
Field(name="test_column_1", dtype=String),
],
source=file_source,
)
feature_view_2 = FeatureView(
name="test2",
entities=[entity1, entity2],
schema=[
Field(name="feature", dtype=Float32),
Field(name="test_column_1", dtype=String),
Field(name="test_column_2", dtype=String),
],
source=file_source,
)

assert len(feature_view_1.schema) == 2
assert len(feature_view_1.features) == 2

# The entity field should be deleted from the schema and features of the feature view.
update_feature_views_with_inferred_features(
[feature_view_1], [entity1], RepoConfig(provider="local", project="test")
)
assert len(feature_view_1.schema) == 1
assert len(feature_view_1.features) == 1

assert len(feature_view_2.schema) == 3
assert len(feature_view_2.features) == 3

# The entity fields should be deleted from the schema and features of the feature view.
update_feature_views_with_inferred_features(
[feature_view_2],
[entity1, entity2],
RepoConfig(provider="local", project="test"),
)
assert len(feature_view_2.schema) == 1
assert len(feature_view_2.features) == 1

0 comments on commit 6130b80

Please sign in to comment.