Skip to content

Commit

Permalink
fix: Preserve ordering of features in _get_column_names (#2457)
Browse files Browse the repository at this point in the history
* Preserve ordering of features in _get_column_names

* Correction + unit test

Signed-off-by: David Y Liu <davidyliuliu@gmail.com>

* lint corrections

Signed-off-by: David Y Liu <davidyliuliu@gmail.com>

* correction

Signed-off-by: David Y Liu <davidyliuliu@gmail.com>

* my 'make lint' seems to work differently than the ones in the PR tests. Reverting lint changes

Signed-off-by: David Y Liu <davidyliuliu@gmail.com>
  • Loading branch information
mavysavydav committed Apr 4, 2022
1 parent 4fa73a9 commit 495b435
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
10 changes: 7 additions & 3 deletions sdk/python/feast/infra/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,13 @@ def _get_column_names(

# We need to exclude join keys and timestamp columns from the list of features, after they are mapped to
# their final column names via the `field_mapping` field of the source.
_feature_names = set(feature_names) - set(join_keys)
_feature_names = _feature_names - {event_timestamp_column, created_timestamp_column}
feature_names = list(_feature_names)
feature_names = [
name
for name in feature_names
if name not in join_keys
and name != event_timestamp_column
and name != created_timestamp_column
]
return (
join_keys,
feature_names,
Expand Down
47 changes: 47 additions & 0 deletions sdk/python/tests/unit/infra/test_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2020 The Feast Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import timedelta

from feast import BigQuerySource
from feast.entity import Entity
from feast.feature import Feature
from feast.feature_view import FeatureView
from feast.infra.provider import _get_column_names
from feast.value_type import ValueType


def test_get_column_names_preserves_feature_ordering():
entity = Entity("my-entity", description="My entity", value_type=ValueType.STRING)
fv = FeatureView(
name="my-fv",
entities=["my-entity"],
ttl=timedelta(days=1),
batch_source=BigQuerySource(table="non-existent-mock"),
features=[
Feature(name="a", dtype=ValueType.STRING),
Feature(name="b", dtype=ValueType.STRING),
Feature(name="c", dtype=ValueType.STRING),
Feature(name="d", dtype=ValueType.STRING),
Feature(name="e", dtype=ValueType.STRING),
Feature(name="f", dtype=ValueType.STRING),
Feature(name="g", dtype=ValueType.STRING),
Feature(name="h", dtype=ValueType.STRING),
Feature(name="i", dtype=ValueType.STRING),
Feature(name="j", dtype=ValueType.STRING),
],
)

_, feature_list, _, _ = _get_column_names(fv, [entity])
assert feature_list == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]

0 comments on commit 495b435

Please sign in to comment.