From c16d9ade777ddd926c6bb92ad40978f93e275901 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 11:01:46 -0700 Subject: [PATCH 01/30] Refactor go feature server Signed-off-by: Kevin Zhang --- sdk/python/tests/conftest.py | 19 +-- .../integration/e2e/test_go_feature_server.py | 96 ++++---------- sdk/python/tests/utils/feature_utils.py | 120 ++++++++++++++++++ sdk/python/tests/utils/http_utils.py | 12 ++ 4 files changed, 165 insertions(+), 82 deletions(-) create mode 100644 sdk/python/tests/utils/feature_utils.py create mode 100644 sdk/python/tests/utils/http_utils.py diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 06b77f13b3..18c609d16a 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -46,6 +46,8 @@ FileDataSourceCreator, ) +from tests.utils.http_utils import check_port_open, free_port + logger = logging.getLogger(__name__) level = logging.INFO @@ -327,7 +329,7 @@ def feature_server_endpoint(environment): yield environment.feature_store.get_feature_server_endpoint() return - port = _free_port() + port = free_port() proc = Process( target=start_test_local_server, @@ -340,7 +342,7 @@ def feature_server_endpoint(environment): proc.start() # Wait for server to start wait_retry_backoff( - lambda: (None, _check_port_open("localhost", port)), + lambda: (None, check_port_open("localhost", port)), timeout_secs=10, ) @@ -353,23 +355,12 @@ def feature_server_endpoint(environment): wait_retry_backoff( lambda: ( None, - not _check_port_open("localhost", environment.get_local_server_port()), + not check_port_open("localhost", environment.get_local_server_port()), ), timeout_secs=30, ) -def _check_port_open(host, port) -> bool: - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - return sock.connect_ex((host, port)) == 0 - - -def _free_port(): - sock = socket.socket() - sock.bind(("", 0)) - return sock.getsockname()[1] - - @pytest.fixture def universal_data_sources(environment) -> TestData: return construct_universal_test_data(environment) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index dc83246c2d..3b155ae694 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -34,31 +34,10 @@ location, ) +from tests.utils.http_utils import free_port, check_port_open +from tests.utils.feature_utils import generate_expected_logs, get_latest_rows -@pytest.fixture -def initialized_registry(environment, universal_data_sources): - fs = environment.feature_store - - _, _, data_sources = universal_data_sources - feature_views = construct_universal_feature_views(data_sources) - - feature_service = FeatureService( - name="driver_features", - features=[feature_views.driver], - logging_config=LoggingConfig( - destination=environment.data_source_creator.create_logged_features_destination(), - sample_rate=1.0, - ), - ) - feast_objects: List[FeastObject] = [feature_service] - feast_objects.extend(feature_views.values()) - feast_objects.extend([driver(), customer(), location()]) - - fs.apply(feast_objects) - fs.materialize(environment.start_date, environment.end_date) - - -def server_port(environment, server_type: str): +def _server_port(environment, server_type: str): if not environment.test_repo_config.go_feature_serving: pytest.skip("Only for Go path") @@ -106,15 +85,38 @@ def server_port(environment, server_type: str): # wait for graceful stop time.sleep(5) +# Go test fixtures + +@pytest.fixture +def initialized_registry(environment, universal_data_sources): + fs = environment.feature_store + + _, _, data_sources = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + feature_service = FeatureService( + name="driver_features", + features=[feature_views.driver], + logging_config=LoggingConfig( + destination=environment.data_source_creator.create_logged_features_destination(), + sample_rate=1.0, + ), + ) + feast_objects: List[FeastObject] = [feature_service] + feast_objects.extend(feature_views.values()) + feast_objects.extend([driver(), customer(), location()]) + + fs.apply(feast_objects) + fs.materialize(environment.start_date, environment.end_date) @pytest.fixture def grpc_server_port(environment, initialized_registry): - yield from server_port(environment, "grpc") + yield from _server_port(environment, "grpc") @pytest.fixture def http_server_port(environment, initialized_registry): - yield from server_port(environment, "http") + yield from _server_port(environment, "http") @pytest.fixture @@ -252,45 +254,3 @@ def retrieve(): persisted_logs = persisted_logs.sort_values(by="driver_id").reset_index(drop=True) persisted_logs = persisted_logs[expected_logs.columns] pd.testing.assert_frame_equal(expected_logs, persisted_logs, check_dtype=False) - - -def free_port(): - sock = socket.socket() - sock.bind(("", 0)) - return sock.getsockname()[1] - - -def check_port_open(host, port) -> bool: - with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: - return sock.connect_ex((host, port)) == 0 - - -def get_latest_rows(df, join_key, entity_values): - rows = df[df[join_key].isin(entity_values)] - return rows.loc[rows.groupby(join_key)["event_timestamp"].idxmax()] - - -def generate_expected_logs( - df: pd.DataFrame, - feature_view: FeatureView, - features: List[str], - join_keys: List[str], - timestamp_column: str, -): - logs = pd.DataFrame() - for join_key in join_keys: - logs[join_key] = df[join_key] - - for feature in features: - col = f"{feature_view.name}__{feature}" - logs[col] = df[feature] - logs[f"{col}__timestamp"] = df[timestamp_column] - logs[f"{col}__status"] = FieldStatus.PRESENT - if feature_view.ttl: - logs[f"{col}__status"] = logs[f"{col}__status"].mask( - df[timestamp_column] - < datetime.utcnow().replace(tzinfo=pytz.UTC) - feature_view.ttl, - FieldStatus.OUTSIDE_MAX_AGE, - ) - - return logs.sort_values(by=join_keys).reset_index(drop=True) diff --git a/sdk/python/tests/utils/feature_utils.py b/sdk/python/tests/utils/feature_utils.py new file mode 100644 index 0000000000..b61fe75298 --- /dev/null +++ b/sdk/python/tests/utils/feature_utils.py @@ -0,0 +1,120 @@ +from typing import List +from datetime import datetime +import pytz + +import contextlib +import datetime +import tempfile +import uuid +from pathlib import Path +from typing import Iterator, Union + +import numpy as np +import pandas as pd +import pyarrow + +from feast import FeatureService, FeatureStore, FeatureView +from feast.errors import FeatureViewNotFoundException +from feast.feature_logging import LOG_DATE_FIELD, LOG_TIMESTAMP_FIELD, REQUEST_ID_FIELD +from feast.protos.feast.serving.ServingService_pb2 import FieldStatus + +""" +Return latest rows in a dataframe based on join key and entity values. +""" +def get_latest_rows(df, join_key, entity_values): + rows = df[df[join_key].isin(entity_values)] + return rows.loc[rows.groupby(join_key)["event_timestamp"].idxmax()] + +""" +Given dataframe and feature view, generate the expected logging dataframes that would be otherwise generated by our logging infrastructure. +""" +def generate_expected_logs( + df: pd.DataFrame, + feature_view: FeatureView, + features: List[str], + join_keys: List[str], + timestamp_column: str, +): + logs = pd.DataFrame() + for join_key in join_keys: + logs[join_key] = df[join_key] + + for feature in features: + col = f"{feature_view.name}__{feature}" + logs[col] = df[feature] + logs[f"{col}__timestamp"] = df[timestamp_column] + logs[f"{col}__status"] = FieldStatus.PRESENT + if feature_view.ttl: + logs[f"{col}__status"] = logs[f"{col}__status"].mask( + df[timestamp_column] + < datetime.utcnow().replace(tzinfo=pytz.UTC) - feature_view.ttl, + FieldStatus.OUTSIDE_MAX_AGE, + ) + + return logs.sort_values(by=join_keys).reset_index(drop=True) + + +def prepare_logs( + source_df: pd.DataFrame, feature_service: FeatureService, store: FeatureStore +) -> pd.DataFrame: + num_rows = source_df.shape[0] + + logs_df = pd.DataFrame() + logs_df[REQUEST_ID_FIELD] = [str(uuid.uuid4()) for _ in range(num_rows)] + logs_df[LOG_TIMESTAMP_FIELD] = pd.Series( + np.random.randint(0, 7 * 24 * 3600, num_rows) + ).map(lambda secs: pd.Timestamp.utcnow() - datetime.timedelta(seconds=secs)) + logs_df[LOG_DATE_FIELD] = logs_df[LOG_TIMESTAMP_FIELD].dt.date + + for projection in feature_service.feature_view_projections: + try: + view = store.get_feature_view(projection.name) + except FeatureViewNotFoundException: + view = store.get_on_demand_feature_view(projection.name) + for source in view.source_request_sources.values(): + for field in source.schema: + logs_df[field.name] = source_df[field.name] + else: + for entity_name in view.entities: + entity = store.get_entity(entity_name) + logs_df[entity.join_key] = source_df[entity.join_key] + + for feature in projection.features: + source_field = ( + feature.name + if feature.name in source_df.columns + else f"{projection.name_to_use()}__{feature.name}" + ) + destination_field = f"{projection.name_to_use()}__{feature.name}" + logs_df[destination_field] = source_df[source_field] + logs_df[f"{destination_field}__timestamp"] = source_df[ + "event_timestamp" + ].dt.floor("s") + if logs_df[f"{destination_field}__timestamp"].dt.tz: + logs_df[f"{destination_field}__timestamp"] = logs_df[ + f"{destination_field}__timestamp" + ].dt.tz_convert(None) + logs_df[f"{destination_field}__status"] = FieldStatus.PRESENT + if isinstance(view, FeatureView) and view.ttl: + logs_df[f"{destination_field}__status"] = logs_df[ + f"{destination_field}__status" + ].mask( + logs_df[f"{destination_field}__timestamp"] + < (datetime.datetime.utcnow() - view.ttl), + FieldStatus.OUTSIDE_MAX_AGE, + ) + + return logs_df + + +@contextlib.contextmanager +def to_logs_dataset( + table: pyarrow.Table, pass_as_path: bool +) -> Iterator[Union[pyarrow.Table, Path]]: + if not pass_as_path: + yield table + return + + with tempfile.TemporaryDirectory() as temp_dir: + pyarrow.parquet.write_to_dataset(table, root_path=temp_dir) + yield Path(temp_dir) diff --git a/sdk/python/tests/utils/http_utils.py b/sdk/python/tests/utils/http_utils.py new file mode 100644 index 0000000000..091d88f213 --- /dev/null +++ b/sdk/python/tests/utils/http_utils.py @@ -0,0 +1,12 @@ +import socket +from contextlib import closing + +def free_port(): + sock = socket.socket() + sock.bind(("", 0)) + return sock.getsockname()[1] + + +def check_port_open(host, port) -> bool: + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + return sock.connect_ex((host, port)) == 0 From 562dcb4f3573a46b3e7b21a6aee81d29cabf721b Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 11:06:57 -0700 Subject: [PATCH 02/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/conftest.py | 5 +---- .../integration/e2e/test_go_feature_server.py | 11 ++++++----- sdk/python/tests/utils/feature_utils.py | 15 +++++++++------ sdk/python/tests/utils/http_utils.py | 1 + 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 18c609d16a..eeb6361ca8 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -14,8 +14,6 @@ import logging import multiprocessing import os -import socket -from contextlib import closing from datetime import datetime, timedelta from multiprocessing import Process from sys import platform @@ -45,8 +43,7 @@ from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 FileDataSourceCreator, ) - -from tests.utils.http_utils import check_port_open, free_port +from tests.utils.http_utils import check_port_open, free_port # noqa: E402 logger = logging.getLogger(__name__) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 3b155ae694..5bc28455a3 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -1,7 +1,5 @@ -import socket import threading import time -from contextlib import closing from datetime import datetime from typing import List @@ -11,7 +9,7 @@ import pytz import requests -from feast import FeatureService, FeatureView, ValueType +from feast import FeatureService, ValueType from feast.embedded_go.online_features_service import EmbeddedOnlineFeatureServer from feast.feast_object import FeastObject from feast.feature_logging import LoggingConfig @@ -33,9 +31,9 @@ driver, location, ) - -from tests.utils.http_utils import free_port, check_port_open from tests.utils.feature_utils import generate_expected_logs, get_latest_rows +from tests.utils.http_utils import check_port_open, free_port + def _server_port(environment, server_type: str): if not environment.test_repo_config.go_feature_serving: @@ -85,8 +83,10 @@ def _server_port(environment, server_type: str): # wait for graceful stop time.sleep(5) + # Go test fixtures + @pytest.fixture def initialized_registry(environment, universal_data_sources): fs = environment.feature_store @@ -109,6 +109,7 @@ def initialized_registry(environment, universal_data_sources): fs.apply(feast_objects) fs.materialize(environment.start_date, environment.end_date) + @pytest.fixture def grpc_server_port(environment, initialized_registry): yield from _server_port(environment, "grpc") diff --git a/sdk/python/tests/utils/feature_utils.py b/sdk/python/tests/utils/feature_utils.py index b61fe75298..a3c9624b9e 100644 --- a/sdk/python/tests/utils/feature_utils.py +++ b/sdk/python/tests/utils/feature_utils.py @@ -1,17 +1,14 @@ -from typing import List -from datetime import datetime -import pytz - import contextlib import datetime import tempfile import uuid from pathlib import Path -from typing import Iterator, Union +from typing import Iterator, List, Union import numpy as np import pandas as pd import pyarrow +import pytz from feast import FeatureService, FeatureStore, FeatureView from feast.errors import FeatureViewNotFoundException @@ -21,13 +18,18 @@ """ Return latest rows in a dataframe based on join key and entity values. """ + + def get_latest_rows(df, join_key, entity_values): rows = df[df[join_key].isin(entity_values)] return rows.loc[rows.groupby(join_key)["event_timestamp"].idxmax()] + """ Given dataframe and feature view, generate the expected logging dataframes that would be otherwise generated by our logging infrastructure. """ + + def generate_expected_logs( df: pd.DataFrame, feature_view: FeatureView, @@ -47,7 +49,8 @@ def generate_expected_logs( if feature_view.ttl: logs[f"{col}__status"] = logs[f"{col}__status"].mask( df[timestamp_column] - < datetime.utcnow().replace(tzinfo=pytz.UTC) - feature_view.ttl, + < datetime.datetime.utcnow().replace(tzinfo=pytz.UTC) + - feature_view.ttl, FieldStatus.OUTSIDE_MAX_AGE, ) diff --git a/sdk/python/tests/utils/http_utils.py b/sdk/python/tests/utils/http_utils.py index 091d88f213..47c6cb8ac1 100644 --- a/sdk/python/tests/utils/http_utils.py +++ b/sdk/python/tests/utils/http_utils.py @@ -1,6 +1,7 @@ import socket from contextlib import closing + def free_port(): sock = socket.socket() sock.bind(("", 0)) From d9ab14692c9fc3410a7731d70264be35dfb3c23b Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 11:30:08 -0700 Subject: [PATCH 03/30] Fix e2e tests Signed-off-by: Kevin Zhang --- .../integration/e2e/test_go_feature_server.py | 187 +++++++++--------- .../e2e/test_python_feature_server.py | 10 +- .../integration/e2e/test_universal_e2e.py | 139 +------------ .../tests/integration/e2e/test_usage_e2e.py | 2 + .../materialization/test_lambda.py | 145 +------------- sdk/python/tests/utils/e2e_test_utils.py | 140 +++++++++++++ 6 files changed, 253 insertions(+), 370 deletions(-) create mode 100644 sdk/python/tests/utils/e2e_test_utils.py diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 5bc28455a3..502047b953 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -35,97 +35,6 @@ from tests.utils.http_utils import check_port_open, free_port -def _server_port(environment, server_type: str): - if not environment.test_repo_config.go_feature_serving: - pytest.skip("Only for Go path") - - fs = environment.feature_store - - embedded = EmbeddedOnlineFeatureServer( - repo_path=str(fs.repo_path.absolute()), - repo_config=fs.config, - feature_store=fs, - ) - port = free_port() - if server_type == "grpc": - target = embedded.start_grpc_server - elif server_type == "http": - target = embedded.start_http_server - else: - raise ValueError("Server Type must be either 'http' or 'grpc'") - - t = threading.Thread( - target=target, - args=("127.0.0.1", port), - kwargs=dict( - enable_logging=True, - logging_options=FeatureLoggingConfig( - enabled=True, - queue_capacity=100, - write_to_disk_interval_secs=1, - flush_interval_secs=1, - emit_timeout_micro_secs=10000, - ), - ), - ) - t.start() - - wait_retry_backoff( - lambda: (None, check_port_open("127.0.0.1", port)), timeout_secs=15 - ) - - yield port - if server_type == "grpc": - embedded.stop_grpc_server() - else: - embedded.stop_http_server() - - # wait for graceful stop - time.sleep(5) - - -# Go test fixtures - - -@pytest.fixture -def initialized_registry(environment, universal_data_sources): - fs = environment.feature_store - - _, _, data_sources = universal_data_sources - feature_views = construct_universal_feature_views(data_sources) - - feature_service = FeatureService( - name="driver_features", - features=[feature_views.driver], - logging_config=LoggingConfig( - destination=environment.data_source_creator.create_logged_features_destination(), - sample_rate=1.0, - ), - ) - feast_objects: List[FeastObject] = [feature_service] - feast_objects.extend(feature_views.values()) - feast_objects.extend([driver(), customer(), location()]) - - fs.apply(feast_objects) - fs.materialize(environment.start_date, environment.end_date) - - -@pytest.fixture -def grpc_server_port(environment, initialized_registry): - yield from _server_port(environment, "grpc") - - -@pytest.fixture -def http_server_port(environment, initialized_registry): - yield from _server_port(environment, "http") - - -@pytest.fixture -def grpc_client(grpc_server_port): - ch = grpc.insecure_channel(f"localhost:{grpc_server_port}") - yield ServingServiceStub(ch) - - @pytest.mark.integration @pytest.mark.goserver def test_go_grpc_server(grpc_client): @@ -255,3 +164,99 @@ def retrieve(): persisted_logs = persisted_logs.sort_values(by="driver_id").reset_index(drop=True) persisted_logs = persisted_logs[expected_logs.columns] pd.testing.assert_frame_equal(expected_logs, persisted_logs, check_dtype=False) + + +""" +Start go feature server either on http or grpc based on the repo configuration for testing. +""" + + +def _server_port(environment, server_type: str): + if not environment.test_repo_config.go_feature_serving: + pytest.skip("Only for Go path") + + fs = environment.feature_store + + embedded = EmbeddedOnlineFeatureServer( + repo_path=str(fs.repo_path.absolute()), + repo_config=fs.config, + feature_store=fs, + ) + port = free_port() + if server_type == "grpc": + target = embedded.start_grpc_server + elif server_type == "http": + target = embedded.start_http_server + else: + raise ValueError("Server Type must be either 'http' or 'grpc'") + + t = threading.Thread( + target=target, + args=("127.0.0.1", port), + kwargs=dict( + enable_logging=True, + logging_options=FeatureLoggingConfig( + enabled=True, + queue_capacity=100, + write_to_disk_interval_secs=1, + flush_interval_secs=1, + emit_timeout_micro_secs=10000, + ), + ), + ) + t.start() + + wait_retry_backoff( + lambda: (None, check_port_open("127.0.0.1", port)), timeout_secs=15 + ) + + yield port + if server_type == "grpc": + embedded.stop_grpc_server() + else: + embedded.stop_http_server() + + # wait for graceful stop + time.sleep(5) + + +# Go test fixtures + + +@pytest.fixture +def initialized_registry(environment, universal_data_sources): + fs = environment.feature_store + + _, _, data_sources = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + feature_service = FeatureService( + name="driver_features", + features=[feature_views.driver], + logging_config=LoggingConfig( + destination=environment.data_source_creator.create_logged_features_destination(), + sample_rate=1.0, + ), + ) + feast_objects: List[FeastObject] = [feature_service] + feast_objects.extend(feature_views.values()) + feast_objects.extend([driver(), customer(), location()]) + + fs.apply(feast_objects) + fs.materialize(environment.start_date, environment.end_date) + + +@pytest.fixture +def grpc_server_port(environment, initialized_registry): + yield from _server_port(environment, "grpc") + + +@pytest.fixture +def http_server_port(environment, initialized_registry): + yield from _server_port(environment, "http") + + +@pytest.fixture +def grpc_client(grpc_server_port): + ch = grpc.insecure_channel(f"localhost:{grpc_server_port}") + yield ServingServiceStub(ch) diff --git a/sdk/python/tests/integration/e2e/test_python_feature_server.py b/sdk/python/tests/integration/e2e/test_python_feature_server.py index 97b9693391..9c61f6fa19 100644 --- a/sdk/python/tests/integration/e2e/test_python_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_python_feature_server.py @@ -58,7 +58,9 @@ def test_get_online_features(python_fs_client): @pytest.mark.integration @pytest.mark.universal_online_stores def test_push(python_fs_client): - initial_temp = get_temperatures(python_fs_client, location_ids=[1])[0] + initial_temp = _get_temperatures_from_feature_server( + python_fs_client, location_ids=[1] + )[0] json_data = json.dumps( { "push_source_name": "location_stats_push_source", @@ -77,10 +79,12 @@ def test_push(python_fs_client): # Check new pushed temperature is fetched assert response.status_code == 200 - assert get_temperatures(python_fs_client, location_ids=[1]) == [initial_temp * 100] + assert _get_temperatures_from_feature_server( + python_fs_client, location_ids=[1] + ) == [initial_temp * 100] -def get_temperatures(client, location_ids: List[int]): +def _get_temperatures_from_feature_server(client, location_ids: List[int]): get_request_data = { "features": ["pushable_location_stats:temperature"], "entities": {"location_id": location_ids}, diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index a42a96e594..122d345ce4 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -1,14 +1,10 @@ -import math -from datetime import datetime, timedelta -from typing import Optional +from datetime import timedelta -import pandas as pd import pytest -from pytz import utc -from feast import FeatureStore, FeatureView from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view +from tests.utils.e2e_test_utils import validate_offline_online_store_consistency @pytest.mark.integration @@ -30,133 +26,4 @@ def test_e2e_consistency(environment, e2e_data_sources, infer_features): # we use timestamp from generated dataframe as a split point split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) - run_offline_online_store_consistency_test(fs, fv, split_dt) - - -def check_offline_and_online_features( - fs: FeatureStore, - fv: FeatureView, - driver_id: int, - event_timestamp: datetime, - expected_value: Optional[float], - full_feature_names: bool, - check_offline_store: bool = True, -) -> None: - # Check online store - response_dict = fs.get_online_features( - [f"{fv.name}:value"], - [{"driver_id": driver_id}], - full_feature_names=full_feature_names, - ).to_dict() - - if full_feature_names: - - if expected_value: - assert response_dict[f"{fv.name}__value"][0], f"Response: {response_dict}" - assert ( - abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 - ), f"Response: {response_dict}, Expected: {expected_value}" - else: - assert response_dict[f"{fv.name}__value"][0] is None - else: - if expected_value: - assert response_dict["value"][0], f"Response: {response_dict}" - assert ( - abs(response_dict["value"][0] - expected_value) < 1e-6 - ), f"Response: {response_dict}, Expected: {expected_value}" - else: - assert response_dict["value"][0] is None - - # Check offline store - if check_offline_store: - df = fs.get_historical_features( - entity_df=pd.DataFrame.from_dict( - {"driver_id": [driver_id], "event_timestamp": [event_timestamp]} - ), - features=[f"{fv.name}:value"], - full_feature_names=full_feature_names, - ).to_df() - - if full_feature_names: - if expected_value: - assert ( - abs( - df.to_dict(orient="list")[f"{fv.name}__value"][0] - - expected_value - ) - < 1e-6 - ) - else: - assert not df.to_dict(orient="list")[f"{fv.name}__value"] or math.isnan( - df.to_dict(orient="list")[f"{fv.name}__value"][0] - ) - else: - if expected_value: - assert ( - abs(df.to_dict(orient="list")["value"][0] - expected_value) < 1e-6 - ) - else: - assert not df.to_dict(orient="list")["value"] or math.isnan( - df.to_dict(orient="list")["value"][0] - ) - - -def run_offline_online_store_consistency_test( - fs: FeatureStore, fv: FeatureView, split_dt: datetime -) -> None: - now = datetime.utcnow() - - full_feature_names = True - check_offline_store: bool = True - - # Run materialize() - # use both tz-naive & tz-aware timestamps to test that they're both correctly handled - start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) - end_date = split_dt - fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) - - # check result of materialize() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=1, - event_timestamp=end_date, - expected_value=0.3, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=2, - event_timestamp=end_date, - expected_value=None, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # check prior value for materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=end_date, - expected_value=4, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # run materialize_incremental() - fs.materialize_incremental(feature_views=[fv.name], end_date=now) - - # check result of materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=now, - expected_value=5, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) + validate_offline_online_store_consistency(fs, fv, split_dt) diff --git a/sdk/python/tests/integration/e2e/test_usage_e2e.py b/sdk/python/tests/integration/e2e/test_usage_e2e.py index 53e4a32a82..5c95bd50b1 100644 --- a/sdk/python/tests/integration/e2e/test_usage_e2e.py +++ b/sdk/python/tests/integration/e2e/test_usage_e2e.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# This file tests our usage tracking system in `usage.py`. import os import sys import tempfile diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 4a259fd365..05288dadc8 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -1,13 +1,8 @@ -import math -import time -from datetime import datetime, timedelta -from typing import Optional +from datetime import timedelta -import pandas as pd import pytest -from pytz import utc -from feast import Entity, Feature, FeatureStore, FeatureView, ValueType +from feast import Entity, Feature, FeatureView, ValueType from tests.data.data_creator import create_basic_driver_dataset from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, @@ -19,10 +14,11 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) +from tests.utils.e2e_test_utils import validate_offline_online_store_consistency @pytest.mark.integration -def test_lambda_materialization(): +def test_lambda_materialization_consistency(): lambda_config = IntegrationTestRepoConfig( provider="aws", online_store={"type": "dynamodb", "region": "us-west-2"}, @@ -70,137 +66,6 @@ def test_lambda_materialization(): print(f"Split datetime: {split_dt}") - run_offline_online_store_consistency_test(fs, driver_stats_fv, split_dt) + validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) finally: fs.teardown() - - -def check_offline_and_online_features( - fs: FeatureStore, - fv: FeatureView, - driver_id: int, - event_timestamp: datetime, - expected_value: Optional[float], - full_feature_names: bool, - check_offline_store: bool = True, -) -> None: - # Check online store - response_dict = fs.get_online_features( - [f"{fv.name}:value"], - [{"driver_id": driver_id}], - full_feature_names=full_feature_names, - ).to_dict() - - if full_feature_names: - - if expected_value: - assert response_dict[f"{fv.name}__value"][0], f"Response: {response_dict}" - assert ( - abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 - ), f"Response: {response_dict}, Expected: {expected_value}" - else: - assert response_dict[f"{fv.name}__value"][0] is None - else: - if expected_value: - assert response_dict["value"][0], f"Response: {response_dict}" - assert ( - abs(response_dict["value"][0] - expected_value) < 1e-6 - ), f"Response: {response_dict}, Expected: {expected_value}" - else: - assert response_dict["value"][0] is None - - # Check offline store - if check_offline_store: - df = fs.get_historical_features( - entity_df=pd.DataFrame.from_dict( - {"driver_id": [driver_id], "event_timestamp": [event_timestamp]} - ), - features=[f"{fv.name}:value"], - full_feature_names=full_feature_names, - ).to_df() - - if full_feature_names: - if expected_value: - assert ( - abs( - df.to_dict(orient="list")[f"{fv.name}__value"][0] - - expected_value - ) - < 1e-6 - ) - else: - assert not df.to_dict(orient="list")[f"{fv.name}__value"] or math.isnan( - df.to_dict(orient="list")[f"{fv.name}__value"][0] - ) - else: - if expected_value: - assert ( - abs(df.to_dict(orient="list")["value"][0] - expected_value) < 1e-6 - ) - else: - assert not df.to_dict(orient="list")["value"] or math.isnan( - df.to_dict(orient="list")["value"][0] - ) - - -def run_offline_online_store_consistency_test( - fs: FeatureStore, fv: FeatureView, split_dt: datetime -) -> None: - now = datetime.utcnow() - - full_feature_names = True - check_offline_store: bool = True - - # Run materialize() - # use both tz-naive & tz-aware timestamps to test that they're both correctly handled - start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) - end_date = split_dt - fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) - - time.sleep(10) - - # check result of materialize() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=1, - event_timestamp=end_date, - expected_value=0.3, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=2, - event_timestamp=end_date, - expected_value=None, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # check prior value for materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=end_date, - expected_value=4, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # run materialize_incremental() - fs.materialize_incremental(feature_views=[fv.name], end_date=now) - - # check result of materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=now, - expected_value=5, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) diff --git a/sdk/python/tests/utils/e2e_test_utils.py b/sdk/python/tests/utils/e2e_test_utils.py new file mode 100644 index 0000000000..6eb69351af --- /dev/null +++ b/sdk/python/tests/utils/e2e_test_utils.py @@ -0,0 +1,140 @@ +import math +import time +from datetime import datetime, timedelta +from typing import Optional + +import pandas as pd +from pytz import utc + +from feast import FeatureStore, FeatureView + + +def check_offline_and_online_features( + fs: FeatureStore, + fv: FeatureView, + driver_id: int, + event_timestamp: datetime, + expected_value: Optional[float], + full_feature_names: bool, + check_offline_store: bool = True, +) -> None: + # Check online store + response_dict = fs.get_online_features( + [f"{fv.name}:value"], + [{"driver_id": driver_id}], + full_feature_names=full_feature_names, + ).to_dict() + + if full_feature_names: + + if expected_value: + assert response_dict[f"{fv.name}__value"][0], f"Response: {response_dict}" + assert ( + abs(response_dict[f"{fv.name}__value"][0] - expected_value) < 1e-6 + ), f"Response: {response_dict}, Expected: {expected_value}" + else: + assert response_dict[f"{fv.name}__value"][0] is None + else: + if expected_value: + assert response_dict["value"][0], f"Response: {response_dict}" + assert ( + abs(response_dict["value"][0] - expected_value) < 1e-6 + ), f"Response: {response_dict}, Expected: {expected_value}" + else: + assert response_dict["value"][0] is None + + # Check offline store + if check_offline_store: + df = fs.get_historical_features( + entity_df=pd.DataFrame.from_dict( + {"driver_id": [driver_id], "event_timestamp": [event_timestamp]} + ), + features=[f"{fv.name}:value"], + full_feature_names=full_feature_names, + ).to_df() + + if full_feature_names: + if expected_value: + assert ( + abs( + df.to_dict(orient="list")[f"{fv.name}__value"][0] + - expected_value + ) + < 1e-6 + ) + else: + assert not df.to_dict(orient="list")[f"{fv.name}__value"] or math.isnan( + df.to_dict(orient="list")[f"{fv.name}__value"][0] + ) + else: + if expected_value: + assert ( + abs(df.to_dict(orient="list")["value"][0] - expected_value) < 1e-6 + ) + else: + assert not df.to_dict(orient="list")["value"] or math.isnan( + df.to_dict(orient="list")["value"][0] + ) + + +def validate_offline_online_store_consistency( + fs: FeatureStore, fv: FeatureView, split_dt: datetime +) -> None: + now = datetime.utcnow() + + full_feature_names = True + check_offline_store: bool = True + + # Run materialize() + # use both tz-naive & tz-aware timestamps to test that they're both correctly handled + start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) + end_date = split_dt + fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) + + time.sleep(10) + + # check result of materialize() + check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=1, + event_timestamp=end_date, + expected_value=0.3, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=2, + event_timestamp=end_date, + expected_value=None, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + # check prior value for materialize_incremental() + check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=3, + event_timestamp=end_date, + expected_value=4, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + # run materialize_incremental() + fs.materialize_incremental(feature_views=[fv.name], end_date=now) + + # check result of materialize_incremental() + check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=3, + event_timestamp=now, + expected_value=5, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) From 209d15bca997ef65791241904fe1338f020cf61c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 12:47:50 -0700 Subject: [PATCH 04/30] Fix Signed-off-by: Kevin Zhang --- .../tests/integration/e2e/test_validation.py | 135 ++-- ...=> test_push_features_to_offline_store.py} | 0 .../test_universal_historical_retrieval.py | 444 +------------- ... => test_push_features_to_online_store.py} | 0 .../integration/registration/test_cli.py | 149 +---- .../registration/test_cli_apply_duplicates.py | 7 + .../registration/test_cli_chdir.py | 3 + .../registration/test_feature_store.py | 128 ++-- .../registration/test_inference.py | 395 +----------- .../integration/registration/test_registry.py | 576 +----------------- .../registration/test_universal_types.py | 182 +++--- .../unit/infra/test_inference_unit_tests.py | 388 ++++++++++++ .../tests/unit/infra/test_local_registry.py | 532 ++++++++++++++++ sdk/python/tests/utils/e2e_test_utils.py | 218 ++++++- .../tests/utils/feature_store_test_utils.py | 399 ++++++++++++ ...feature_utils.py => feature_test_utils.py} | 0 sdk/python/tests/utils/logged_features.py | 81 --- 17 files changed, 1809 insertions(+), 1828 deletions(-) rename sdk/python/tests/integration/offline_store/{test_push_offline.py => test_push_features_to_offline_store.py} (100%) rename sdk/python/tests/integration/online_store/{test_push_online.py => test_push_features_to_online_store.py} (100%) create mode 100644 sdk/python/tests/unit/infra/test_inference_unit_tests.py create mode 100644 sdk/python/tests/unit/infra/test_local_registry.py create mode 100644 sdk/python/tests/utils/feature_store_test_utils.py rename sdk/python/tests/utils/{feature_utils.py => feature_test_utils.py} (100%) delete mode 100644 sdk/python/tests/utils/logged_features.py diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index b8908663b3..f7dcd7b494 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -39,72 +39,6 @@ ] -@ge_profiler -def configurable_profiler(dataset: PandasDataset) -> ExpectationSuite: - from great_expectations.profile.user_configurable_profiler import ( - UserConfigurableProfiler, - ) - - return UserConfigurableProfiler( - profile_dataset=dataset, - ignored_columns=["event_timestamp"], - excluded_expectations=[ - "expect_table_columns_to_match_ordered_list", - "expect_table_row_count_to_be_between", - ], - value_set_threshold="few", - ).build_suite() - - -@ge_profiler(with_feature_metadata=True) -def profiler_with_feature_metadata(dataset: PandasDataset) -> ExpectationSuite: - from great_expectations.profile.user_configurable_profiler import ( - UserConfigurableProfiler, - ) - - # always present - dataset.expect_column_values_to_be_in_set( - "global_stats__avg_ride_length__status", {FieldStatus.PRESENT} - ) - - # present at least in 70% of rows - dataset.expect_column_values_to_be_in_set( - "customer_profile__current_balance__status", {FieldStatus.PRESENT}, mostly=0.7 - ) - - return UserConfigurableProfiler( - profile_dataset=dataset, - ignored_columns=["event_timestamp"] - + [ - c - for c in dataset.columns - if c.endswith("__timestamp") or c.endswith("__status") - ], - excluded_expectations=[ - "expect_table_columns_to_match_ordered_list", - "expect_table_row_count_to_be_between", - ], - value_set_threshold="few", - ).build_suite() - - -@ge_profiler -def profiler_with_unrealistic_expectations(dataset: PandasDataset) -> ExpectationSuite: - # need to create dataframe with corrupted data first - df = pd.DataFrame() - df["current_balance"] = [-100] - df["avg_passenger_count"] = [0] - - other_ds = PandasDataset(df) - other_ds.expect_column_max_to_be_between("current_balance", -1000, -100) - other_ds.expect_column_values_to_be_in_set("avg_passenger_count", value_set={0}) - - # this should pass - other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000) - - return other_ds.get_expectation_suite() - - @pytest.mark.integration @pytest.mark.universal_offline_stores def test_historical_retrieval_with_validation(environment, universal_data_sources): @@ -357,3 +291,72 @@ def test_e2e_validation_via_cli(environment, universal_data_sources): p = runner.run(validate_args, cwd=local_repo.repo_path) assert p.returncode == 1, p.stdout.decode() assert "Validation failed" in p.stdout.decode(), p.stderr.decode() + + +# Great expectations profilers created for testing + + +@ge_profiler +def configurable_profiler(dataset: PandasDataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import ( + UserConfigurableProfiler, + ) + + return UserConfigurableProfiler( + profile_dataset=dataset, + ignored_columns=["event_timestamp"], + excluded_expectations=[ + "expect_table_columns_to_match_ordered_list", + "expect_table_row_count_to_be_between", + ], + value_set_threshold="few", + ).build_suite() + + +@ge_profiler(with_feature_metadata=True) +def profiler_with_feature_metadata(dataset: PandasDataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import ( + UserConfigurableProfiler, + ) + + # always present + dataset.expect_column_values_to_be_in_set( + "global_stats__avg_ride_length__status", {FieldStatus.PRESENT} + ) + + # present at least in 70% of rows + dataset.expect_column_values_to_be_in_set( + "customer_profile__current_balance__status", {FieldStatus.PRESENT}, mostly=0.7 + ) + + return UserConfigurableProfiler( + profile_dataset=dataset, + ignored_columns=["event_timestamp"] + + [ + c + for c in dataset.columns + if c.endswith("__timestamp") or c.endswith("__status") + ], + excluded_expectations=[ + "expect_table_columns_to_match_ordered_list", + "expect_table_row_count_to_be_between", + ], + value_set_threshold="few", + ).build_suite() + + +@ge_profiler +def profiler_with_unrealistic_expectations(dataset: PandasDataset) -> ExpectationSuite: + # need to create dataframe with corrupted data first + df = pd.DataFrame() + df["current_balance"] = [-100] + df["avg_passenger_count"] = [0] + + other_ds = PandasDataset(df) + other_ds.expect_column_max_to_be_between("current_balance", -1000, -100) + other_ds.expect_column_values_to_be_in_set("avg_passenger_count", value_set={0}) + + # this should pass + other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000) + + return other_ds.get_expectation_suite() diff --git a/sdk/python/tests/integration/offline_store/test_push_offline.py b/sdk/python/tests/integration/offline_store/test_push_features_to_offline_store.py similarity index 100% rename from sdk/python/tests/integration/offline_store/test_push_offline.py rename to sdk/python/tests/integration/offline_store/test_push_features_to_offline_store.py diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 87bf59fe9f..524b9b31eb 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -1,20 +1,13 @@ import random import time from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional import numpy as np import pandas as pd import pytest -from pandas.testing import assert_frame_equal as pd_assert_frame_equal -from pytz import utc -from feast import utils from feast.entity import Entity -from feast.errors import ( - FeatureNameCollisionError, - RequestDataNotFoundInEntityDfException, -) +from feast.errors import RequestDataNotFoundInEntityDfException from feast.feature_service import FeatureService from feast.feature_view import FeatureView from feast.field import Field @@ -34,263 +27,17 @@ driver, location, ) +from tests.utils.feature_store_test_utils import ( + assert_feature_service_correctness, + assert_feature_service_entity_mapping_correctness, + get_expected_training_df, + get_response_feature_name, + validate_dataframes, +) np.random.seed(0) -def convert_timestamp_records_to_utc( - records: List[Dict[str, Any]], column: str -) -> List[Dict[str, Any]]: - for record in records: - record[column] = utils.make_tzaware(record[column]).astimezone(utc) - return records - - -# Find the latest record in the given time range and filter -def find_asof_record( - records: List[Dict[str, Any]], - ts_key: str, - ts_start: datetime, - ts_end: datetime, - filter_keys: Optional[List[str]] = None, - filter_values: Optional[List[Any]] = None, -) -> Dict[str, Any]: - filter_keys = filter_keys or [] - filter_values = filter_values or [] - assert len(filter_keys) == len(filter_values) - found_record: Dict[str, Any] = {} - for record in records: - if ( - all( - [ - record[filter_key] == filter_value - for filter_key, filter_value in zip(filter_keys, filter_values) - ] - ) - and ts_start <= record[ts_key] <= ts_end - ): - if not found_record or found_record[ts_key] < record[ts_key]: - found_record = record - return found_record - - -def get_expected_training_df( - customer_df: pd.DataFrame, - customer_fv: FeatureView, - driver_df: pd.DataFrame, - driver_fv: FeatureView, - orders_df: pd.DataFrame, - order_fv: FeatureView, - location_df: pd.DataFrame, - location_fv: FeatureView, - global_df: pd.DataFrame, - global_fv: FeatureView, - field_mapping_df: pd.DataFrame, - field_mapping_fv: FeatureView, - entity_df: pd.DataFrame, - event_timestamp: str, - full_feature_names: bool = False, -): - # Convert all pandas dataframes into records with UTC timestamps - customer_records = convert_timestamp_records_to_utc( - customer_df.to_dict("records"), customer_fv.batch_source.timestamp_field - ) - driver_records = convert_timestamp_records_to_utc( - driver_df.to_dict("records"), driver_fv.batch_source.timestamp_field - ) - order_records = convert_timestamp_records_to_utc( - orders_df.to_dict("records"), event_timestamp - ) - location_records = convert_timestamp_records_to_utc( - location_df.to_dict("records"), location_fv.batch_source.timestamp_field - ) - global_records = convert_timestamp_records_to_utc( - global_df.to_dict("records"), global_fv.batch_source.timestamp_field - ) - field_mapping_records = convert_timestamp_records_to_utc( - field_mapping_df.to_dict("records"), - field_mapping_fv.batch_source.timestamp_field, - ) - entity_rows = convert_timestamp_records_to_utc( - entity_df.to_dict("records"), event_timestamp - ) - - # Set sufficiently large ttl that it effectively functions as infinite for the calculations below. - default_ttl = timedelta(weeks=52) - - # Manually do point-in-time join of driver, customer, and order records against - # the entity df - for entity_row in entity_rows: - customer_record = find_asof_record( - customer_records, - ts_key=customer_fv.batch_source.timestamp_field, - ts_start=entity_row[event_timestamp] - - get_feature_view_ttl(customer_fv, default_ttl), - ts_end=entity_row[event_timestamp], - filter_keys=["customer_id"], - filter_values=[entity_row["customer_id"]], - ) - driver_record = find_asof_record( - driver_records, - ts_key=driver_fv.batch_source.timestamp_field, - ts_start=entity_row[event_timestamp] - - get_feature_view_ttl(driver_fv, default_ttl), - ts_end=entity_row[event_timestamp], - filter_keys=["driver_id"], - filter_values=[entity_row["driver_id"]], - ) - order_record = find_asof_record( - order_records, - ts_key=customer_fv.batch_source.timestamp_field, - ts_start=entity_row[event_timestamp] - - get_feature_view_ttl(order_fv, default_ttl), - ts_end=entity_row[event_timestamp], - filter_keys=["customer_id", "driver_id"], - filter_values=[entity_row["customer_id"], entity_row["driver_id"]], - ) - origin_record = find_asof_record( - location_records, - ts_key=location_fv.batch_source.timestamp_field, - ts_start=order_record[event_timestamp] - - get_feature_view_ttl(location_fv, default_ttl), - ts_end=order_record[event_timestamp], - filter_keys=["location_id"], - filter_values=[order_record["origin_id"]], - ) - destination_record = find_asof_record( - location_records, - ts_key=location_fv.batch_source.timestamp_field, - ts_start=order_record[event_timestamp] - - get_feature_view_ttl(location_fv, default_ttl), - ts_end=order_record[event_timestamp], - filter_keys=["location_id"], - filter_values=[order_record["destination_id"]], - ) - global_record = find_asof_record( - global_records, - ts_key=global_fv.batch_source.timestamp_field, - ts_start=order_record[event_timestamp] - - get_feature_view_ttl(global_fv, default_ttl), - ts_end=order_record[event_timestamp], - ) - - field_mapping_record = find_asof_record( - field_mapping_records, - ts_key=field_mapping_fv.batch_source.timestamp_field, - ts_start=order_record[event_timestamp] - - get_feature_view_ttl(field_mapping_fv, default_ttl), - ts_end=order_record[event_timestamp], - ) - - entity_row.update( - { - ( - f"customer_profile__{k}" if full_feature_names else k - ): customer_record.get(k, None) - for k in ( - "current_balance", - "avg_passenger_count", - "lifetime_trip_count", - ) - } - ) - entity_row.update( - { - (f"driver_stats__{k}" if full_feature_names else k): driver_record.get( - k, None - ) - for k in ("conv_rate", "avg_daily_trips") - } - ) - entity_row.update( - { - (f"order__{k}" if full_feature_names else k): order_record.get(k, None) - for k in ("order_is_success",) - } - ) - entity_row.update( - { - "origin__temperature": origin_record.get("temperature", None), - "destination__temperature": destination_record.get("temperature", None), - } - ) - entity_row.update( - { - (f"global_stats__{k}" if full_feature_names else k): global_record.get( - k, None - ) - for k in ( - "num_rides", - "avg_ride_length", - ) - } - ) - - # get field_mapping_record by column name, but label by feature name - entity_row.update( - { - ( - f"field_mapping__{feature}" if full_feature_names else feature - ): field_mapping_record.get(column, None) - for ( - column, - feature, - ) in field_mapping_fv.batch_source.field_mapping.items() - } - ) - - # Convert records back to pandas dataframe - expected_df = pd.DataFrame(entity_rows) - - # Move "event_timestamp" column to front - current_cols = expected_df.columns.tolist() - current_cols.remove(event_timestamp) - expected_df = expected_df[[event_timestamp] + current_cols] - - # Cast some columns to expected types, since we lose information when converting pandas DFs into Python objects. - if full_feature_names: - expected_column_types = { - "order__order_is_success": "int32", - "driver_stats__conv_rate": "float32", - "customer_profile__current_balance": "float32", - "customer_profile__avg_passenger_count": "float32", - "global_stats__avg_ride_length": "float32", - "field_mapping__feature_name": "int32", - } - else: - expected_column_types = { - "order_is_success": "int32", - "conv_rate": "float32", - "current_balance": "float32", - "avg_passenger_count": "float32", - "avg_ride_length": "float32", - "feature_name": "int32", - } - - for col, typ in expected_column_types.items(): - expected_df[col] = expected_df[col].astype(typ) - - conv_feature_name = "driver_stats__conv_rate" if full_feature_names else "conv_rate" - conv_plus_feature_name = response_feature_name( - "conv_rate_plus_100", full_feature_names - ) - expected_df[conv_plus_feature_name] = expected_df[conv_feature_name] + 100 - expected_df[ - response_feature_name("conv_rate_plus_100_rounded", full_feature_names) - ] = ( - expected_df[conv_plus_feature_name] - .astype("float") - .round() - .astype(pd.Int32Dtype()) - ) - if "val_to_add" in expected_df.columns: - expected_df[ - response_feature_name("conv_rate_plus_val_to_add", full_feature_names) - ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) - - return expected_df - - @pytest.mark.integration @pytest.mark.universal_offline_stores @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}") @@ -396,7 +143,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df_from_df_entities.columns) - assert_frame_equal( + validate_dataframes( expected_df, actual_df_from_df_entities, keys=[event_timestamp, "order_id", "driver_id", "customer_id"], @@ -420,7 +167,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n ) table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas() - assert_frame_equal( + validate_dataframes( expected_df, table_from_df_entities, keys=[event_timestamp, "order_id", "driver_id", "customer_id"], @@ -570,15 +317,15 @@ def test_historical_features_with_entities_from_query( # Not requesting the on demand transform with an entity_df query (can't add request data in them) expected_df_query = full_expected_df.drop( columns=[ - response_feature_name("conv_rate_plus_100", full_feature_names), - response_feature_name("conv_rate_plus_100_rounded", full_feature_names), - response_feature_name("avg_daily_trips", full_feature_names), - response_feature_name("conv_rate", full_feature_names), + get_response_feature_name("conv_rate_plus_100", full_feature_names), + get_response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + get_response_feature_name("avg_daily_trips", full_feature_names), + get_response_feature_name("conv_rate", full_feature_names), "origin__temperature", "destination__temperature", ] ) - assert_frame_equal( + validate_dataframes( expected_df_query, actual_df_from_sql_entities, keys=[event_timestamp, "order_id", "driver_id", "customer_id"], @@ -590,7 +337,7 @@ def test_historical_features_with_entities_from_query( table_from_sql_entities[col].dtype ) - assert_frame_equal( + validate_dataframes( expected_df_query, table_from_sql_entities, keys=[event_timestamp, "order_id", "driver_id", "customer_id"], @@ -654,22 +401,22 @@ def test_historical_features_persisting( full_feature_names, ).drop( columns=[ - response_feature_name("conv_rate_plus_100", full_feature_names), - response_feature_name("conv_rate_plus_100_rounded", full_feature_names), - response_feature_name("avg_daily_trips", full_feature_names), - response_feature_name("conv_rate", full_feature_names), + get_response_feature_name("conv_rate_plus_100", full_feature_names), + get_response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + get_response_feature_name("avg_daily_trips", full_feature_names), + get_response_feature_name("conv_rate", full_feature_names), "origin__temperature", "destination__temperature", ] ) - assert_frame_equal( + validate_dataframes( expected_df, saved_dataset.to_df(), keys=[event_timestamp, "driver_id", "customer_id"], ) - assert_frame_equal( + validate_dataframes( job.to_df(), saved_dataset.to_df(), keys=[event_timestamp, "driver_id", "customer_id"], @@ -732,16 +479,16 @@ def test_historical_features_with_no_ttl( full_feature_names, ).drop( columns=[ - response_feature_name("conv_rate_plus_100", full_feature_names), - response_feature_name("conv_rate_plus_100_rounded", full_feature_names), - response_feature_name("avg_daily_trips", full_feature_names), - response_feature_name("conv_rate", full_feature_names), + get_response_feature_name("conv_rate_plus_100", full_feature_names), + get_response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + get_response_feature_name("avg_daily_trips", full_feature_names), + get_response_feature_name("conv_rate", full_feature_names), "origin__temperature", "destination__temperature", ] ) - assert_frame_equal( + validate_dataframes( expected_df, job.to_df(), keys=[event_timestamp, "driver_id", "customer_id"], @@ -842,139 +589,4 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df.columns) - assert_frame_equal(expected_df, actual_df, keys=["driver_id"]) - - -def response_feature_name(feature: str, full_feature_names: bool) -> str: - if feature in {"conv_rate", "avg_daily_trips"} and full_feature_names: - return f"driver_stats__{feature}" - - if ( - feature - in { - "conv_rate_plus_100", - "conv_rate_plus_100_rounded", - "conv_rate_plus_val_to_add", - } - and full_feature_names - ): - return f"conv_rate_plus_100__{feature}" - - return feature - - -def get_feature_view_ttl( - feature_view: FeatureView, default_ttl: timedelta -) -> timedelta: - """Returns the ttl of a feature view if it is non-zero. Otherwise returns the specified default.""" - return feature_view.ttl if feature_view.ttl else default_ttl - - -def assert_feature_service_correctness( - store, feature_service, full_feature_names, entity_df, expected_df, event_timestamp -): - - job_from_df = store.get_historical_features( - entity_df=entity_df, - features=feature_service, - full_feature_names=full_feature_names, - ) - - actual_df_from_df_entities = job_from_df.to_df() - - expected_df = expected_df[ - [ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - response_feature_name("conv_rate", full_feature_names), - response_feature_name("conv_rate_plus_100", full_feature_names), - "driver_age", - ] - ] - - assert_frame_equal( - expected_df, - actual_df_from_df_entities, - keys=[event_timestamp, "order_id", "driver_id", "customer_id"], - ) - - -def assert_feature_service_entity_mapping_correctness( - store, feature_service, full_feature_names, entity_df, expected_df, event_timestamp -): - if full_feature_names: - job_from_df = store.get_historical_features( - entity_df=entity_df, - features=feature_service, - full_feature_names=full_feature_names, - ) - actual_df_from_df_entities = job_from_df.to_df() - - expected_df: pd.DataFrame = ( - expected_df.sort_values( - by=[ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - "origin_id", - "destination_id", - ] - ) - .drop_duplicates() - .reset_index(drop=True) - ) - expected_df = expected_df[ - [ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - "origin_id", - "destination_id", - "origin__temperature", - "destination__temperature", - ] - ] - - assert_frame_equal( - expected_df, - actual_df_from_df_entities, - keys=[ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - "origin_id", - "destination_id", - ], - ) - else: - # using 2 of the same FeatureView without full_feature_names=True will result in collision - with pytest.raises(FeatureNameCollisionError): - job_from_df = store.get_historical_features( - entity_df=entity_df, - features=feature_service, - full_feature_names=full_feature_names, - ) - - -def assert_frame_equal(expected_df, actual_df, keys): - expected_df: pd.DataFrame = ( - expected_df.sort_values(by=keys).drop_duplicates().reset_index(drop=True) - ) - - actual_df = ( - actual_df[expected_df.columns] - .sort_values(by=keys) - .drop_duplicates() - .reset_index(drop=True) - ) - - pd_assert_frame_equal( - expected_df, - actual_df, - check_dtype=False, - ) + validate_dataframes(expected_df, actual_df, keys=["driver_id"]) diff --git a/sdk/python/tests/integration/online_store/test_push_online.py b/sdk/python/tests/integration/online_store/test_push_features_to_online_store.py similarity index 100% rename from sdk/python/tests/integration/online_store/test_push_online.py rename to sdk/python/tests/integration/online_store/test_push_features_to_online_store.py diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index 15e5cf09ee..814f0fbba6 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -1,33 +1,20 @@ -import os import tempfile import uuid -from contextlib import contextmanager from pathlib import Path from textwrap import dedent -from typing import List import pytest -import yaml from assertpy import assertpy -from feast import FeatureStore, RepoConfig -from tests.integration.feature_repos.integration_test_repo_config import ( - IntegrationTestRepoConfig, -) +from feast import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment -from tests.integration.feature_repos.universal.data_source_creator import ( - DataSourceCreator, -) -from tests.integration.feature_repos.universal.data_sources.bigquery import ( - BigQueryDataSourceCreator, -) -from tests.integration.feature_repos.universal.data_sources.file import ( - FileDataSourceCreator, -) -from tests.integration.feature_repos.universal.data_sources.redshift import ( - RedshiftDataSourceCreator, -) from tests.utils.cli_utils import CliRunner, get_example_repo +from tests.utils.e2e_test_utils import ( + NULLABLE_ONLINE_STORE_CONFIGS, + make_feature_store_yaml, + setup_third_party_provider_repo, + setup_third_party_registry_store_repo, +) from tests.utils.online_read_write_test import basic_rw_test @@ -125,58 +112,6 @@ def test_universal_cli(environment: Environment): runner.run(["teardown"], cwd=repo_path) -def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): - offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) - - offline_store_config = offline_creator.create_offline_store_config() - online_store = test_repo_config.online_store - - config = RepoConfig( - registry=str(Path(repo_dir_name) / "registry.db"), - project=project, - provider=test_repo_config.provider, - offline_store=offline_store_config, - online_store=online_store, - repo_path=str(Path(repo_dir_name)), - ) - config_dict = config.dict() - if ( - isinstance(config_dict["online_store"], dict) - and "redis_type" in config_dict["online_store"] - ): - if str(config_dict["online_store"]["redis_type"]) == "RedisType.redis_cluster": - config_dict["online_store"]["redis_type"] = "redis_cluster" - elif str(config_dict["online_store"]["redis_type"]) == "RedisType.redis": - config_dict["online_store"]["redis_type"] = "redis" - config_dict["repo_path"] = str(config_dict["repo_path"]) - return yaml.safe_dump(config_dict) - - -NULLABLE_ONLINE_STORE_CONFIGS: List[IntegrationTestRepoConfig] = [ - IntegrationTestRepoConfig( - provider="local", - offline_store_creator=FileDataSourceCreator, - online_store=None, - ), -] - -if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": - NULLABLE_ONLINE_STORE_CONFIGS.extend( - [ - IntegrationTestRepoConfig( - provider="gcp", - offline_store_creator=BigQueryDataSourceCreator, - online_store=None, - ), - IntegrationTestRepoConfig( - provider="aws", - offline_store_creator=RedshiftDataSourceCreator, - online_store=None, - ), - ] - ) - - @pytest.mark.integration @pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) def test_nullable_online_store(test_nullable_online_store) -> None: @@ -233,74 +168,7 @@ def test_odfv_apply(environment) -> None: runner.run(["teardown"], cwd=repo_path) -@contextmanager -def setup_third_party_provider_repo(provider_name: str): - with tempfile.TemporaryDirectory() as repo_dir_name: - - # Construct an example repo in a temporary dir - repo_path = Path(repo_dir_name) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text( - dedent( - f""" - project: foo - registry: data/registry.db - provider: {provider_name} - online_store: - path: data/online_store.db - type: sqlite - offline_store: - type: file - """ - ) - ) - - (repo_path / "foo").mkdir() - repo_example = repo_path / "foo/provider.py" - repo_example.write_text( - (Path(__file__).parents[2] / "foo_provider.py").read_text() - ) - - yield repo_path - - -@contextmanager -def setup_third_party_registry_store_repo(registry_store: str): - with tempfile.TemporaryDirectory() as repo_dir_name: - - # Construct an example repo in a temporary dir - repo_path = Path(repo_dir_name) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text( - dedent( - f""" - project: foo - registry: - registry_store_type: {registry_store} - path: foobar://foo.bar - provider: local - online_store: - path: data/online_store.db - type: sqlite - offline_store: - type: file - """ - ) - ) - - (repo_path / "foo").mkdir() - repo_example = repo_path / "foo/registry_store.py" - repo_example.write_text( - (Path(__file__).parents[2] / "foo_registry_store.py").read_text() - ) - - yield repo_path - - +@pytest.mark.integration def test_3rd_party_providers() -> None: """ Test running apply on third party providers @@ -331,6 +199,7 @@ def test_3rd_party_providers() -> None: assertpy.assert_that(return_code).is_equal_to(0) +@pytest.mark.integration def test_3rd_party_registry_store() -> None: """ Test running apply on third party registry stores diff --git a/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py b/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py index bad3b50a80..dd8ba69708 100644 --- a/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py +++ b/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py @@ -2,9 +2,12 @@ from pathlib import Path from textwrap import dedent +import pytest + from tests.utils.cli_utils import CliRunner, get_example_repo +@pytest.mark.integration def test_cli_apply_duplicated_featureview_names() -> None: run_simple_apply_test( example_repo_file_name="example_feature_repo_with_duplicated_featureview_names.py", @@ -12,6 +15,7 @@ def test_cli_apply_duplicated_featureview_names() -> None: ) +@pytest.mark.integration def test_cli_apply_duplicate_data_source_names() -> None: run_simple_apply_test( example_repo_file_name="example_repo_duplicate_data_source_names.py", @@ -47,6 +51,7 @@ def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes): assert rc != 0 and expected_error in output +@pytest.mark.integration def test_cli_apply_imported_featureview() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI @@ -90,6 +95,7 @@ def test_cli_apply_imported_featureview() -> None: assert b"Created feature service driver_locations_service" in output +@pytest.mark.integration def test_cli_apply_imported_featureview_with_duplication() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI @@ -143,6 +149,7 @@ def test_cli_apply_imported_featureview_with_duplication() -> None: ) +@pytest.mark.integration def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None: """ Test apply feature views with duplicated names from multiple py files in a feature repo using CLI diff --git a/sdk/python/tests/integration/registration/test_cli_chdir.py b/sdk/python/tests/integration/registration/test_cli_chdir.py index ff26c2f5e2..a3c9209e4d 100644 --- a/sdk/python/tests/integration/registration/test_cli_chdir.py +++ b/sdk/python/tests/integration/registration/test_cli_chdir.py @@ -2,9 +2,12 @@ from datetime import datetime, timedelta from pathlib import Path +import pytest + from tests.utils.cli_utils import CliRunner +@pytest.mark.integration def test_cli_chdir() -> None: """ This test simply makes sure that you can run 'feast --chdir COMMAND' diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 12e9658649..25f622db72 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -37,61 +37,7 @@ ) -@pytest.fixture -def feature_store_with_local_registry(): - fd, registry_path = mkstemp() - fd, online_store_path = mkstemp() - return FeatureStore( - config=RepoConfig( - registry=registry_path, - project="default", - provider="local", - online_store=SqliteOnlineStoreConfig(path=online_store_path), - ) - ) - - -@pytest.fixture -def feature_store_with_gcs_registry(): - from google.cloud import storage - - storage_client = storage.Client() - bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" - bucket = storage_client.bucket(bucket_name) - bucket = storage_client.create_bucket(bucket) - bucket.add_lifecycle_delete_rule( - age=14 - ) # delete buckets automatically after 14 days - bucket.patch() - bucket.blob("registry.db") - - return FeatureStore( - config=RepoConfig( - registry=f"gs://{bucket_name}/registry.db", - project="default", - provider="gcp", - ) - ) - - -@pytest.fixture -def feature_store_with_s3_registry(): - aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-integration-tests/registries" - ) - return FeatureStore( - config=RepoConfig( - registry=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", - project="default", - provider="aws", - online_store=DynamoDBOnlineStoreConfig( - region=os.getenv("AWS_REGION", "us-west-2") - ), - offline_store=FileOfflineStoreConfig(), - ) - ) - - +@pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], @@ -160,6 +106,7 @@ def test_apply_entity_integration(test_feature_store): test_feature_store.teardown() +@pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], @@ -357,6 +304,7 @@ def test_apply_feature_view_integration(test_feature_store): test_feature_store.teardown() +@pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], @@ -418,19 +366,7 @@ def test_apply_object_and_read(test_feature_store): test_feature_store.teardown() -def test_apply_remote_repo(): - fd, registry_path = mkstemp() - fd, online_store_path = mkstemp() - return FeatureStore( - config=RepoConfig( - registry=registry_path, - project="default", - provider="local", - online_store=SqliteOnlineStoreConfig(path=online_store_path), - ) - ) - - +@pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", [lazy_fixture("feature_store_with_local_registry")], @@ -488,6 +424,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): test_feature_store.teardown() +@pytest.mark.integration def test_apply_conflicting_featureview_names(feature_store_with_local_registry): """Test applying feature views with non-case-insensitively unique names""" driver = Entity(name="driver", join_keys=["driver_id"]) @@ -522,3 +459,58 @@ def test_apply_conflicting_featureview_names(feature_store_with_local_registry): ) feature_store_with_local_registry.teardown() + + +@pytest.fixture +def feature_store_with_local_registry(): + fd, registry_path = mkstemp() + fd, online_store_path = mkstemp() + return FeatureStore( + config=RepoConfig( + registry=registry_path, + project="default", + provider="local", + online_store=SqliteOnlineStoreConfig(path=online_store_path), + ) + ) + + +@pytest.fixture +def feature_store_with_gcs_registry(): + from google.cloud import storage + + storage_client = storage.Client() + bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" + bucket = storage_client.bucket(bucket_name) + bucket = storage_client.create_bucket(bucket) + bucket.add_lifecycle_delete_rule( + age=14 + ) # delete buckets automatically after 14 days + bucket.patch() + bucket.blob("registry.db") + + return FeatureStore( + config=RepoConfig( + registry=f"gs://{bucket_name}/registry.db", + project="default", + provider="gcp", + ) + ) + + +@pytest.fixture +def feature_store_with_s3_registry(): + aws_registry_path = os.getenv( + "AWS_REGISTRY_PATH", "s3://feast-integration-tests/registries" + ) + return FeatureStore( + config=RepoConfig( + registry=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", + project="default", + provider="aws", + online_store=DynamoDBOnlineStoreConfig( + region=os.getenv("AWS_REGION", "us-west-2") + ), + offline_store=FileOfflineStoreConfig(), + ) + ) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index f660c46b15..d719b9f971 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -1,36 +1,10 @@ from copy import deepcopy -import pandas as pd import pytest -from feast import ( - BigQuerySource, - Entity, - Feature, - FeatureService, - FileSource, - RedshiftSource, - RepoConfig, - SnowflakeSource, - ValueType, -) -from feast.data_source import RequestSource -from feast.errors import ( - DataSourceNoNameException, - RegistryInferenceFailure, - SpecifiedFeaturesNotPresentError, -) -from feast.feature_view import FeatureView -from feast.field import Field -from feast.inference import ( - update_data_sources_with_inferred_event_timestamp_col, - update_feature_views_with_inferred_features_and_entities, -) -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( - SparkSource, -) -from feast.on_demand_feature_view import on_demand_feature_view -from feast.types import Float32, Float64, Int64, String, UnixTimestamp +from feast import RepoConfig +from feast.errors import RegistryInferenceFailure +from feast.inference import update_data_sources_with_inferred_event_timestamp_col from tests.utils.data_source_utils import ( prep_file_source, simple_bq_source_using_query_arg, @@ -38,42 +12,6 @@ ) -def test_infer_datasource_names_file(): - file_path = "path/to/test.csv" - data_source = FileSource(path=file_path) - assert data_source.name == file_path - - source_name = "my_name" - data_source = FileSource(name=source_name, path=file_path) - assert data_source.name == source_name - - -def test_infer_datasource_names_dwh(): - table = "project.table" - dwh_classes = [BigQuerySource, RedshiftSource, SnowflakeSource, SparkSource] - - for dwh_class in dwh_classes: - data_source = dwh_class(table=table) - assert data_source.name == table - - source_name = "my_name" - data_source_with_table = dwh_class(name=source_name, table=table) - assert data_source_with_table.name == source_name - data_source_with_query = dwh_class( - name=source_name, query=f"SELECT * from {table}" - ) - assert data_source_with_query.name == source_name - - # If we have a query and no name, throw an error - if dwh_class == SparkSource: - with pytest.raises(DataSourceNoNameException): - print(f"Testing dwh {dwh_class}") - data_source = dwh_class(query="test_query") - else: - data_source = dwh_class(query="test_query") - assert data_source.name == "" - - @pytest.mark.integration def test_update_file_data_source_with_inferred_event_timestamp_col(simple_dataset_1): df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True) @@ -123,330 +61,3 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so assert actual_event_timestamp_cols == ["event_timestamp"] * len( data_sources_copy.values() ) - - -def test_on_demand_features_type_inference(): - # Create Feature Views - date_request = RequestSource( - name="date_request", - schema=[Field(name="some_date", dtype=UnixTimestamp)], - ) - - @on_demand_feature_view( - sources=[date_request], - schema=[ - Field(name="output", dtype=UnixTimestamp), - Field(name="string_output", dtype=String), - ], - ) - def test_view(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) - return data - - test_view.infer_features() - - @on_demand_feature_view( - # Note: we deliberately use `inputs` instead of `sources` to test that `inputs` - # still works correctly, even though it is deprecated. - # TODO(felixwang9817): Remove references to `inputs` once it is fully deprecated. - inputs={"date_request": date_request}, - features=[ - Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), - Feature(name="object_output", dtype=ValueType.STRING), - ], - ) - def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - data["object_output"] = features_df["some_date"].astype(str) - return data - - with pytest.raises(ValueError, match="Value with native type object"): - invalid_test_view.infer_features() - - @on_demand_feature_view( - # Note: we deliberately use positional arguments here to test that they work correctly, - # even though positional arguments are deprecated in favor of keyword arguments. - # TODO(felixwang9817): Remove positional arguments once they are fully deprecated. - [ - Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), - Feature(name="missing", dtype=ValueType.STRING), - ], - {"date_request": date_request}, - ) - def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - return data - - with pytest.raises(SpecifiedFeaturesNotPresentError): - test_view_with_missing_feature.infer_features() - - -# TODO(kevjumba): remove this in feast 0.24 when deprecating -@pytest.mark.parametrize( - "request_source_schema", - [ - [Field(name="some_date", dtype=UnixTimestamp)], - {"some_date": ValueType.UNIX_TIMESTAMP}, - ], -) -def test_datasource_inference(request_source_schema): - # Create Feature Views - date_request = RequestSource( - name="date_request", - schema=request_source_schema, - ) - - @on_demand_feature_view( - # Note: we deliberately use positional arguments here to test that they work correctly, - # even though positional arguments are deprecated in favor of keyword arguments. - # TODO(felixwang9817): Remove positional arguments once they are fully deprecated. - [ - Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), - Feature(name="string_output", dtype=ValueType.STRING), - ], - sources=[date_request], - ) - def test_view(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) - return data - - test_view.infer_features() - - @on_demand_feature_view( - sources=[date_request], - schema=[ - Field(name="output", dtype=UnixTimestamp), - Field(name="object_output", dtype=String), - ], - ) - def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - data["object_output"] = features_df["some_date"].astype(str) - return data - - with pytest.raises(ValueError, match="Value with native type object"): - invalid_test_view.infer_features() - - @on_demand_feature_view( - sources=[date_request], - features=[ - Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), - Feature(name="missing", dtype=ValueType.STRING), - ], - ) - def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["output"] = features_df["some_date"] - return data - - with pytest.raises(SpecifiedFeaturesNotPresentError): - test_view_with_missing_feature.infer_features() - - -def test_feature_view_inference_respects_basic_inference(): - """ - Tests that feature view inference respects the basic inference that occurs during creation. - """ - file_source = FileSource(name="test", path="test path") - entity1 = Entity(name="test1", join_keys=["test_column_1"]) - entity2 = Entity(name="test2", join_keys=["test_column_2"]) - feature_view_1 = FeatureView( - name="test1", - entities=[entity1], - schema=[ - Field(name="feature", dtype=Float32), - Field(name="test_column_1", dtype=String), - ], - source=file_source, - ) - feature_view_2 = FeatureView( - name="test2", - entities=[entity1, entity2], - schema=[ - Field(name="feature", dtype=Float32), - Field(name="test_column_1", dtype=String), - Field(name="test_column_2", dtype=String), - ], - source=file_source, - ) - - assert len(feature_view_1.schema) == 2 - assert len(feature_view_1.features) == 1 - assert len(feature_view_1.entity_columns) == 1 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_1], [entity1], RepoConfig(provider="local", project="test") - ) - assert len(feature_view_1.schema) == 2 - assert len(feature_view_1.features) == 1 - assert len(feature_view_1.entity_columns) == 1 - - assert len(feature_view_2.schema) == 3 - assert len(feature_view_2.features) == 1 - assert len(feature_view_2.entity_columns) == 2 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_2], - [entity1, entity2], - RepoConfig(provider="local", project="test"), - ) - assert len(feature_view_2.schema) == 3 - assert len(feature_view_2.features) == 1 - assert len(feature_view_2.entity_columns) == 2 - - -def test_feature_view_inference_on_entity_columns(simple_dataset_1): - """ - Tests that feature view inference correctly infers entity columns. - """ - with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: - entity1 = Entity(name="test1", join_keys=["id_join_key"]) - feature_view_1 = FeatureView( - name="test1", - entities=[entity1], - schema=[Field(name="int64_col", dtype=Int64)], - source=file_source, - ) - - assert len(feature_view_1.schema) == 1 - assert len(feature_view_1.features) == 1 - assert len(feature_view_1.entity_columns) == 0 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_1], [entity1], RepoConfig(provider="local", project="test") - ) - - # The schema is only used as a parameter, as is therefore not updated during inference. - assert len(feature_view_1.schema) == 1 - - # Since there is already a feature specified, additional features are not inferred. - assert len(feature_view_1.features) == 1 - - # The single entity column is inferred correctly. - assert len(feature_view_1.entity_columns) == 1 - - -def test_feature_view_inference_respects_entity_value_type(simple_dataset_1): - """ - Tests that feature view inference still respects an entity's value type. - """ - # TODO(felixwang9817): Remove this test once entity value_type is removed. - with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: - entity1 = Entity( - name="test1", join_keys=["id_join_key"], value_type=ValueType.STRING - ) - feature_view_1 = FeatureView( - name="test1", - entities=[entity1], - schema=[Field(name="int64_col", dtype=Int64)], - source=file_source, - ) - - assert len(feature_view_1.schema) == 1 - assert len(feature_view_1.features) == 1 - assert len(feature_view_1.entity_columns) == 0 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_1], [entity1], RepoConfig(provider="local", project="test") - ) - - # The schema is only used as a parameter, as is therefore not updated during inference. - assert len(feature_view_1.schema) == 1 - - # Since there is already a feature specified, additional features are not inferred. - assert len(feature_view_1.features) == 1 - - # The single entity column is inferred correctly and has type String. - assert len(feature_view_1.entity_columns) == 1 - assert feature_view_1.entity_columns[0].dtype == String - - -def test_feature_view_inference_on_feature_columns(simple_dataset_1): - """ - Tests that feature view inference correctly infers feature columns. - """ - with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: - entity1 = Entity(name="test1", join_keys=["id_join_key"]) - feature_view_1 = FeatureView( - name="test1", - entities=[entity1], - schema=[Field(name="id_join_key", dtype=Int64)], - source=file_source, - ) - - assert len(feature_view_1.schema) == 1 - assert len(feature_view_1.features) == 0 - assert len(feature_view_1.entity_columns) == 1 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_1], [entity1], RepoConfig(provider="local", project="test") - ) - - # The schema is only used as a parameter, as is therefore not updated during inference. - assert len(feature_view_1.schema) == 1 - - # All three feature columns are inferred correctly. - assert len(feature_view_1.features) == 3 - print(feature_view_1.features) - feature_column_1 = Field(name="float_col", dtype=Float64) - feature_column_2 = Field(name="int64_col", dtype=Int64) - feature_column_3 = Field(name="string_col", dtype=String) - assert feature_column_1 in feature_view_1.features - assert feature_column_2 in feature_view_1.features - assert feature_column_3 in feature_view_1.features - - # The single entity column remains. - assert len(feature_view_1.entity_columns) == 1 - - -def test_update_feature_services_with_inferred_features(simple_dataset_1): - with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: - entity1 = Entity(name="test1", join_keys=["id_join_key"]) - feature_view_1 = FeatureView( - name="test1", - entities=[entity1], - source=file_source, - ) - feature_view_2 = FeatureView( - name="test2", - entities=[entity1], - source=file_source, - ) - - feature_service = FeatureService( - name="fs_1", features=[feature_view_1[["string_col"]], feature_view_2] - ) - assert len(feature_service.feature_view_projections) == 2 - assert len(feature_service.feature_view_projections[0].features) == 0 - assert len(feature_service.feature_view_projections[0].desired_features) == 1 - assert len(feature_service.feature_view_projections[1].features) == 0 - assert len(feature_service.feature_view_projections[1].desired_features) == 0 - - update_feature_views_with_inferred_features_and_entities( - [feature_view_1, feature_view_2], - [entity1], - RepoConfig(provider="local", project="test"), - ) - feature_service.infer_features( - fvs_to_update={ - feature_view_1.name: feature_view_1, - feature_view_2.name: feature_view_2, - } - ) - - assert len(feature_view_1.schema) == 0 - assert len(feature_view_1.features) == 3 - assert len(feature_view_2.schema) == 0 - assert len(feature_view_2.features) == 3 - assert len(feature_service.feature_view_projections[0].features) == 1 - assert len(feature_service.feature_view_projections[1].features) == 3 - - -# TODO(felixwang9817): Add tests that interact with field mapping. diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index e192657074..ceb26714a3 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -14,33 +14,19 @@ import os import time from datetime import timedelta -from tempfile import mkstemp -import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture from feast import FileSource -from feast.aggregation import Aggregation -from feast.data_format import AvroFormat, ParquetFormat -from feast.data_source import KafkaSource +from feast.data_format import ParquetFormat from feast.entity import Entity -from feast.feature import Feature from feast.feature_view import FeatureView from feast.field import Field -from feast.on_demand_feature_view import RequestSource, on_demand_feature_view from feast.registry import Registry from feast.repo_config import RegistryConfig -from feast.stream_feature_view import StreamFeatureView -from feast.types import Array, Bytes, Float32, Int32, Int64, String -from feast.value_type import ValueType - - -@pytest.fixture -def local_registry() -> Registry: - fd, registry_path = mkstemp() - registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) - return Registry(registry_config, None) +from feast.types import Array, Bytes, Int64, String +from tests.utils.e2e_test_utils import validate_registry_data_source_apply @pytest.fixture @@ -74,52 +60,6 @@ def s3_registry() -> Registry: return Registry(registry_config, None) -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -def test_apply_entity_success(test_registry): - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - project = "project" - - # Register Entity - test_registry.apply_entity(entity, project) - - entities = test_registry.list_entities(project) - - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - entity = test_registry.get_entity("driver_car_id", project) - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - test_registry.delete_entity("driver_car_id", project) - entities = test_registry.list_entities(project) - assert len(entities) == 0 - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - @pytest.mark.integration @pytest.mark.parametrize( "test_registry", @@ -163,355 +103,6 @@ def test_apply_entity_integration(test_registry): test_registry._get_registry_proto(project=project) -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -def test_apply_feature_view_success(test_registry): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register Feature View - test_registry.apply_feature_view(fv1, project) - - feature_views = test_registry.list_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == String - and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == Array(String) - and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == Array(Bytes) - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = test_registry.get_feature_view("my_feature_view_1", project) - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == String - and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == Array(String) - and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == Array(Bytes) - and feature_view.entities[0] == "fs1_my_entity_1" - ) - - test_registry.delete_feature_view("my_feature_view_1", project) - feature_views = test_registry.list_feature_views(project) - assert len(feature_views) == 0 - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -def test_apply_on_demand_feature_view_success(test_registry): - # Create Feature Views - driver_stats = FileSource( - name="driver_stats_source", - path="data/driver_stats_lat_lon.parquet", - timestamp_field="event_timestamp", - created_timestamp_column="created", - description="A table describing the stats of a driver based on hourly logs", - owner="test2@gmail.com", - ) - - driver_daily_features_view = FeatureView( - name="driver_daily_features", - entities=["driver"], - ttl=timedelta(seconds=8640000000), - schema=[ - Field(name="daily_miles_driven", dtype=Float32), - Field(name="lat", dtype=Float32), - Field(name="lon", dtype=Float32), - Field(name="string_feature", dtype=String), - ], - online=True, - source=driver_stats, - tags={"production": "True"}, - owner="test2@gmail.com", - ) - - @on_demand_feature_view( - sources=[driver_daily_features_view], - schema=[Field(name="first_char", dtype=String)], - ) - def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: - df = pd.DataFrame() - df["first_char"] = inputs["string_feature"].str[:1].astype("string") - return df - - project = "project" - - # Register Feature View - test_registry.apply_feature_view(location_features_from_push, project) - - feature_views = test_registry.list_on_demand_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "location_features_from_push" - and feature_views[0].features[0].name == "first_char" - and feature_views[0].features[0].dtype == String - ) - - feature_view = test_registry.get_on_demand_feature_view( - "location_features_from_push", project - ) - assert ( - feature_view.name == "location_features_from_push" - and feature_view.features[0].name == "first_char" - and feature_view.features[0].dtype == String - ) - - test_registry.delete_feature_view("location_features_from_push", project) - feature_views = test_registry.list_on_demand_feature_views(project) - assert len(feature_views) == 0 - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -def test_apply_stream_feature_view_success(test_registry): - # Create Feature Views - def simple_udf(x: int): - return x + 3 - - entity = Entity(name="driver_entity", join_keys=["test_key"]) - - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - watermark_delay_threshold=timedelta(days=1), - ) - - sfv = StreamFeatureView( - name="test kafka stream feature view", - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ), - Aggregation( - column="dummy_field2", - function="count", - time_window=timedelta(days=24), - ), - ], - timestamp_field="event_timestamp", - mode="spark", - source=stream_source, - udf=simple_udf, - tags={}, - ) - - project = "project" - - # Register Feature View - test_registry.apply_feature_view(sfv, project) - - stream_feature_views = test_registry.list_stream_feature_views(project) - - # List Feature Views - assert len(stream_feature_views) == 1 - assert stream_feature_views[0] == sfv - - test_registry.delete_feature_view("test kafka stream feature view", project) - stream_feature_views = test_registry.list_stream_feature_views(project) - assert len(stream_feature_views) == 0 - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -# TODO(kevjumba): remove this in feast 0.24 when deprecating -@pytest.mark.parametrize( - "request_source_schema", - [[Field(name="my_input_1", dtype=Int32)], {"my_input_1": ValueType.INT32}], -) -def test_modify_feature_views_success(test_registry, request_source_schema): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - request_source = RequestSource( - name="request_source", - schema=request_source_schema, - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[Field(name="fs1_my_feature_1", dtype=Int64)], - entities=[entity], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - @on_demand_feature_view( - features=[ - Feature(name="odfv1_my_feature_1", dtype=ValueType.STRING), - Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), - ], - sources=[request_source], - ) - def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("category") - data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") - return data - - project = "project" - - # Register Feature Views - test_registry.apply_feature_view(odfv1, project) - test_registry.apply_feature_view(fv1, project) - - # Modify odfv by changing a single feature dtype - @on_demand_feature_view( - features=[ - Feature(name="odfv1_my_feature_1", dtype=ValueType.FLOAT), - Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), - ], - sources=[request_source], - ) - def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("float") - data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") - return data - - # Apply the modified odfv - test_registry.apply_feature_view(odfv1, project) - - # Check odfv - on_demand_feature_views = test_registry.list_on_demand_feature_views(project) - - assert ( - len(on_demand_feature_views) == 1 - and on_demand_feature_views[0].name == "odfv1" - and on_demand_feature_views[0].features[0].name == "odfv1_my_feature_1" - and on_demand_feature_views[0].features[0].dtype == Float32 - and on_demand_feature_views[0].features[1].name == "odfv1_my_feature_2" - and on_demand_feature_views[0].features[1].dtype == Int32 - ) - request_schema = on_demand_feature_views[0].get_request_data_schema() - assert ( - list(request_schema.keys())[0] == "my_input_1" - and list(request_schema.values())[0] == ValueType.INT32 - ) - - feature_view = test_registry.get_on_demand_feature_view("odfv1", project) - assert ( - feature_view.name == "odfv1" - and feature_view.features[0].name == "odfv1_my_feature_1" - and feature_view.features[0].dtype == Float32 - and feature_view.features[1].name == "odfv1_my_feature_2" - and feature_view.features[1].dtype == Int32 - ) - request_schema = feature_view.get_request_data_schema() - assert ( - list(request_schema.keys())[0] == "my_input_1" - and list(request_schema.values())[0] == ValueType.INT32 - ) - - # Make sure fv1 is untouched - feature_views = test_registry.list_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = test_registry.get_feature_view("my_feature_view_1", project) - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.entities[0] == "fs1_my_entity_1" - ) - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - @pytest.mark.integration @pytest.mark.parametrize( "test_registry", @@ -595,163 +186,4 @@ def test_apply_feature_view_integration(test_registry): [lazy_fixture("gcs_registry"), lazy_fixture("s3_registry")], ) def test_apply_data_source_integration(test_registry: Registry): - run_test_data_source_apply(test_registry) - - -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) -def test_apply_data_source(test_registry: Registry): - run_test_data_source_apply(test_registry) - - -def run_test_data_source_apply(test_registry: Registry): - # Create Feature Views - batch_source = FileSource( - name="test_source", - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register data source and feature view - test_registry.apply_data_source(batch_source, project, commit=False) - test_registry.apply_feature_view(fv1, project, commit=True) - - registry_feature_views = test_registry.list_feature_views(project) - registry_data_sources = test_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_data_source = registry_data_sources[0] - assert registry_data_source == batch_source - - # Check that change to batch source propagates - batch_source.timestamp_field = "new_ts_col" - test_registry.apply_data_source(batch_source, project, commit=False) - test_registry.apply_feature_view(fv1, project, commit=True) - registry_feature_views = test_registry.list_feature_views(project) - registry_data_sources = test_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_batch_source = test_registry.list_data_sources(project)[0] - assert registry_batch_source == batch_source - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -def test_commit(): - fd, registry_path = mkstemp() - registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) - test_registry = Registry(registry_config, None) - - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - project = "project" - - # Register Entity without commiting - test_registry.apply_entity(entity, project, commit=False) - assert test_registry.cached_registry_proto - assert len(test_registry.cached_registry_proto.project_metadata) == 1 - project_metadata = test_registry.cached_registry_proto.project_metadata[0] - project_uuid = project_metadata.project_uuid - assert len(project_uuid) == 36 - assert_project_uuid(project_uuid, test_registry) - - # Retrieving the entity should still succeed - entities = test_registry.list_entities(project, allow_cache=True) - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - assert_project_uuid(project_uuid, test_registry) - - entity = test_registry.get_entity("driver_car_id", project, allow_cache=True) - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - assert_project_uuid(project_uuid, test_registry) - - # Create new registry that points to the same store - registry_with_same_store = Registry(registry_config, None) - - # Retrieving the entity should fail since the store is empty - entities = registry_with_same_store.list_entities(project) - assert len(entities) == 0 - assert_project_uuid(project_uuid, registry_with_same_store) - - # commit from the original registry - test_registry.commit() - - # Reconstruct the new registry in order to read the newly written store - registry_with_same_store = Registry(registry_config, None) - - # Retrieving the entity should now succeed - entities = registry_with_same_store.list_entities(project) - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - assert_project_uuid(project_uuid, registry_with_same_store) - - entity = test_registry.get_entity("driver_car_id", project) - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -def assert_project_uuid(project_uuid, test_registry): - assert len(test_registry.cached_registry_proto.project_metadata) == 1 - project_metadata = test_registry.cached_registry_proto.project_metadata[0] - assert project_metadata.project_uuid == project_uuid + validate_registry_data_source_apply(test_registry) diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index ad29531e11..1d90eee13e 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -26,97 +26,6 @@ logger = logging.getLogger(__name__) -def populate_test_configs(offline: bool): - feature_dtypes = [ - "int32", - "int64", - "float", - "bool", - "datetime", - ] - configs: List[TypeTestConfig] = [] - for feature_dtype in feature_dtypes: - for feature_is_list in [True, False]: - for has_empty_list in [True, False]: - # For non list features `has_empty_list` does nothing - if feature_is_list is False and has_empty_list is True: - continue - - configs.append( - TypeTestConfig( - feature_dtype=feature_dtype, - feature_is_list=feature_is_list, - has_empty_list=has_empty_list, - ) - ) - return configs - - -@dataclass(frozen=True, repr=True) -class TypeTestConfig: - feature_dtype: str - feature_is_list: bool - has_empty_list: bool - - -OFFLINE_TYPE_TEST_CONFIGS: List[TypeTestConfig] = populate_test_configs(offline=True) -ONLINE_TYPE_TEST_CONFIGS: List[TypeTestConfig] = populate_test_configs(offline=False) - - -@pytest.fixture( - params=OFFLINE_TYPE_TEST_CONFIGS, - ids=[str(c) for c in OFFLINE_TYPE_TEST_CONFIGS], -) -def offline_types_test_fixtures(request, environment): - config: TypeTestConfig = request.param - if ( - environment.test_repo_config.provider == "aws" - and config.feature_is_list is True - ): - pytest.skip("Redshift doesn't support list features") - - return get_fixtures(request, environment) - - -@pytest.fixture( - params=ONLINE_TYPE_TEST_CONFIGS, - ids=[str(c) for c in ONLINE_TYPE_TEST_CONFIGS], -) -def online_types_test_fixtures(request, environment): - return get_fixtures(request, environment) - - -def get_fixtures(request, environment): - config: TypeTestConfig = request.param - # Lower case needed because Redshift lower-cases all table names - destination_name = ( - f"feature_type_{config.feature_dtype}{config.feature_is_list}".replace( - ".", "" - ).lower() - ) - config = request.param - df = create_basic_driver_dataset( - Int64, - config.feature_dtype, - config.feature_is_list, - config.has_empty_list, - ) - data_source = environment.data_source_creator.create_data_source( - df, - destination_name=destination_name, - field_mapping={"ts_1": "ts"}, - ) - fv = create_feature_view( - destination_name, - config.feature_dtype, - config.feature_is_list, - config.has_empty_list, - data_source, - ) - - return config, data_source, fv - - @pytest.mark.integration @pytest.mark.universal_offline_stores @pytest.mark.parametrize("entity_type", [Int32, Int64, String]) @@ -397,3 +306,94 @@ def assert_expected_arrow_types( assert arrow_type_checker(pa_type.value_type) else: assert arrow_type_checker(pa_type) + + +def populate_test_configs(offline: bool): + feature_dtypes = [ + "int32", + "int64", + "float", + "bool", + "datetime", + ] + configs: List[TypeTestConfig] = [] + for feature_dtype in feature_dtypes: + for feature_is_list in [True, False]: + for has_empty_list in [True, False]: + # For non list features `has_empty_list` does nothing + if feature_is_list is False and has_empty_list is True: + continue + + configs.append( + TypeTestConfig( + feature_dtype=feature_dtype, + feature_is_list=feature_is_list, + has_empty_list=has_empty_list, + ) + ) + return configs + + +@dataclass(frozen=True, repr=True) +class TypeTestConfig: + feature_dtype: str + feature_is_list: bool + has_empty_list: bool + + +OFFLINE_TYPE_TEST_CONFIGS: List[TypeTestConfig] = populate_test_configs(offline=True) +ONLINE_TYPE_TEST_CONFIGS: List[TypeTestConfig] = populate_test_configs(offline=False) + + +@pytest.fixture( + params=OFFLINE_TYPE_TEST_CONFIGS, + ids=[str(c) for c in OFFLINE_TYPE_TEST_CONFIGS], +) +def offline_types_test_fixtures(request, environment): + config: TypeTestConfig = request.param + if ( + environment.test_repo_config.provider == "aws" + and config.feature_is_list is True + ): + pytest.skip("Redshift doesn't support list features") + + return get_fixtures(request, environment) + + +@pytest.fixture( + params=ONLINE_TYPE_TEST_CONFIGS, + ids=[str(c) for c in ONLINE_TYPE_TEST_CONFIGS], +) +def online_types_test_fixtures(request, environment): + return get_fixtures(request, environment) + + +def get_fixtures(request, environment): + config: TypeTestConfig = request.param + # Lower case needed because Redshift lower-cases all table names + destination_name = ( + f"feature_type_{config.feature_dtype}{config.feature_is_list}".replace( + ".", "" + ).lower() + ) + config = request.param + df = create_basic_driver_dataset( + Int64, + config.feature_dtype, + config.feature_is_list, + config.has_empty_list, + ) + data_source = environment.data_source_creator.create_data_source( + df, + destination_name=destination_name, + field_mapping={"ts_1": "ts"}, + ) + fv = create_feature_view( + destination_name, + config.feature_dtype, + config.feature_is_list, + config.has_empty_list, + data_source, + ) + + return config, data_source, fv diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py new file mode 100644 index 0000000000..f8cc6689d9 --- /dev/null +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -0,0 +1,388 @@ +import pandas as pd +import pytest + +from feast import ( + BigQuerySource, + Entity, + Feature, + FeatureService, + FileSource, + RedshiftSource, + RepoConfig, + SnowflakeSource, + ValueType, +) +from feast.data_source import RequestSource +from feast.errors import DataSourceNoNameException, SpecifiedFeaturesNotPresentError +from feast.feature_view import FeatureView +from feast.field import Field +from feast.inference import update_feature_views_with_inferred_features_and_entities +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) +from feast.on_demand_feature_view import on_demand_feature_view +from feast.types import Float32, Float64, Int64, String, UnixTimestamp +from tests.utils.data_source_utils import prep_file_source + + +def test_infer_datasource_names_file(): + file_path = "path/to/test.csv" + data_source = FileSource(path=file_path) + assert data_source.name == file_path + + source_name = "my_name" + data_source = FileSource(name=source_name, path=file_path) + assert data_source.name == source_name + + +def test_infer_datasource_names_dwh(): + table = "project.table" + dwh_classes = [BigQuerySource, RedshiftSource, SnowflakeSource, SparkSource] + + for dwh_class in dwh_classes: + data_source = dwh_class(table=table) + assert data_source.name == table + + source_name = "my_name" + data_source_with_table = dwh_class(name=source_name, table=table) + assert data_source_with_table.name == source_name + data_source_with_query = dwh_class( + name=source_name, query=f"SELECT * from {table}" + ) + assert data_source_with_query.name == source_name + + # If we have a query and no name, throw an error + if dwh_class == SparkSource: + with pytest.raises(DataSourceNoNameException): + print(f"Testing dwh {dwh_class}") + data_source = dwh_class(query="test_query") + else: + data_source = dwh_class(query="test_query") + assert data_source.name == "" + + +def test_on_demand_features_type_inference(): + # Create Feature Views + date_request = RequestSource( + name="date_request", + schema=[Field(name="some_date", dtype=UnixTimestamp)], + ) + + @on_demand_feature_view( + sources=[date_request], + schema=[ + Field(name="output", dtype=UnixTimestamp), + Field(name="string_output", dtype=String), + ], + ) + def test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) + return data + + test_view.infer_features() + + @on_demand_feature_view( + # Note: we deliberately use `inputs` instead of `sources` to test that `inputs` + # still works correctly, even though it is deprecated. + # TODO(felixwang9817): Remove references to `inputs` once it is fully deprecated. + inputs={"date_request": date_request}, + features=[ + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="object_output", dtype=ValueType.STRING), + ], + ) + def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["object_output"] = features_df["some_date"].astype(str) + return data + + with pytest.raises(ValueError, match="Value with native type object"): + invalid_test_view.infer_features() + + @on_demand_feature_view( + # Note: we deliberately use positional arguments here to test that they work correctly, + # even though positional arguments are deprecated in favor of keyword arguments. + # TODO(felixwang9817): Remove positional arguments once they are fully deprecated. + [ + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="missing", dtype=ValueType.STRING), + ], + {"date_request": date_request}, + ) + def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data + + with pytest.raises(SpecifiedFeaturesNotPresentError): + test_view_with_missing_feature.infer_features() + + +# TODO(kevjumba): remove this in feast 0.24 when deprecating +@pytest.mark.parametrize( + "request_source_schema", + [ + [Field(name="some_date", dtype=UnixTimestamp)], + {"some_date": ValueType.UNIX_TIMESTAMP}, + ], +) +def test_datasource_inference(request_source_schema): + # Create Feature Views + date_request = RequestSource( + name="date_request", + schema=request_source_schema, + ) + + @on_demand_feature_view( + # Note: we deliberately use positional arguments here to test that they work correctly, + # even though positional arguments are deprecated in favor of keyword arguments. + # TODO(felixwang9817): Remove positional arguments once they are fully deprecated. + [ + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="string_output", dtype=ValueType.STRING), + ], + sources=[date_request], + ) + def test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) + return data + + test_view.infer_features() + + @on_demand_feature_view( + sources=[date_request], + schema=[ + Field(name="output", dtype=UnixTimestamp), + Field(name="object_output", dtype=String), + ], + ) + def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["object_output"] = features_df["some_date"].astype(str) + return data + + with pytest.raises(ValueError, match="Value with native type object"): + invalid_test_view.infer_features() + + @on_demand_feature_view( + sources=[date_request], + features=[ + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="missing", dtype=ValueType.STRING), + ], + ) + def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data + + with pytest.raises(SpecifiedFeaturesNotPresentError): + test_view_with_missing_feature.infer_features() + + +def test_feature_view_inference_respects_basic_inference(): + """ + Tests that feature view inference respects the basic inference that occurs during creation. + """ + file_source = FileSource(name="test", path="test path") + entity1 = Entity(name="test1", join_keys=["test_column_1"]) + entity2 = Entity(name="test2", join_keys=["test_column_2"]) + feature_view_1 = FeatureView( + name="test1", + entities=[entity1], + schema=[ + Field(name="feature", dtype=Float32), + Field(name="test_column_1", dtype=String), + ], + source=file_source, + ) + feature_view_2 = FeatureView( + name="test2", + entities=[entity1, entity2], + schema=[ + Field(name="feature", dtype=Float32), + Field(name="test_column_1", dtype=String), + Field(name="test_column_2", dtype=String), + ], + source=file_source, + ) + + assert len(feature_view_1.schema) == 2 + assert len(feature_view_1.features) == 1 + assert len(feature_view_1.entity_columns) == 1 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_1], [entity1], RepoConfig(provider="local", project="test") + ) + assert len(feature_view_1.schema) == 2 + assert len(feature_view_1.features) == 1 + assert len(feature_view_1.entity_columns) == 1 + + assert len(feature_view_2.schema) == 3 + assert len(feature_view_2.features) == 1 + assert len(feature_view_2.entity_columns) == 2 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_2], + [entity1, entity2], + RepoConfig(provider="local", project="test"), + ) + assert len(feature_view_2.schema) == 3 + assert len(feature_view_2.features) == 1 + assert len(feature_view_2.entity_columns) == 2 + + +def test_feature_view_inference_on_entity_columns(simple_dataset_1): + """ + Tests that feature view inference correctly infers entity columns. + """ + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity1 = Entity(name="test1", join_keys=["id_join_key"]) + feature_view_1 = FeatureView( + name="test1", + entities=[entity1], + schema=[Field(name="int64_col", dtype=Int64)], + source=file_source, + ) + + assert len(feature_view_1.schema) == 1 + assert len(feature_view_1.features) == 1 + assert len(feature_view_1.entity_columns) == 0 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_1], [entity1], RepoConfig(provider="local", project="test") + ) + + # The schema is only used as a parameter, as is therefore not updated during inference. + assert len(feature_view_1.schema) == 1 + + # Since there is already a feature specified, additional features are not inferred. + assert len(feature_view_1.features) == 1 + + # The single entity column is inferred correctly. + assert len(feature_view_1.entity_columns) == 1 + + +def test_feature_view_inference_respects_entity_value_type(simple_dataset_1): + """ + Tests that feature view inference still respects an entity's value type. + """ + # TODO(felixwang9817): Remove this test once entity value_type is removed. + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity1 = Entity( + name="test1", join_keys=["id_join_key"], value_type=ValueType.STRING + ) + feature_view_1 = FeatureView( + name="test1", + entities=[entity1], + schema=[Field(name="int64_col", dtype=Int64)], + source=file_source, + ) + + assert len(feature_view_1.schema) == 1 + assert len(feature_view_1.features) == 1 + assert len(feature_view_1.entity_columns) == 0 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_1], [entity1], RepoConfig(provider="local", project="test") + ) + + # The schema is only used as a parameter, as is therefore not updated during inference. + assert len(feature_view_1.schema) == 1 + + # Since there is already a feature specified, additional features are not inferred. + assert len(feature_view_1.features) == 1 + + # The single entity column is inferred correctly and has type String. + assert len(feature_view_1.entity_columns) == 1 + assert feature_view_1.entity_columns[0].dtype == String + + +def test_feature_view_inference_on_feature_columns(simple_dataset_1): + """ + Tests that feature view inference correctly infers feature columns. + """ + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity1 = Entity(name="test1", join_keys=["id_join_key"]) + feature_view_1 = FeatureView( + name="test1", + entities=[entity1], + schema=[Field(name="id_join_key", dtype=Int64)], + source=file_source, + ) + + assert len(feature_view_1.schema) == 1 + assert len(feature_view_1.features) == 0 + assert len(feature_view_1.entity_columns) == 1 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_1], [entity1], RepoConfig(provider="local", project="test") + ) + + # The schema is only used as a parameter, as is therefore not updated during inference. + assert len(feature_view_1.schema) == 1 + + # All three feature columns are inferred correctly. + assert len(feature_view_1.features) == 3 + print(feature_view_1.features) + feature_column_1 = Field(name="float_col", dtype=Float64) + feature_column_2 = Field(name="int64_col", dtype=Int64) + feature_column_3 = Field(name="string_col", dtype=String) + assert feature_column_1 in feature_view_1.features + assert feature_column_2 in feature_view_1.features + assert feature_column_3 in feature_view_1.features + + # The single entity column remains. + assert len(feature_view_1.entity_columns) == 1 + + +def test_update_feature_services_with_inferred_features(simple_dataset_1): + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity1 = Entity(name="test1", join_keys=["id_join_key"]) + feature_view_1 = FeatureView( + name="test1", + entities=[entity1], + source=file_source, + ) + feature_view_2 = FeatureView( + name="test2", + entities=[entity1], + source=file_source, + ) + + feature_service = FeatureService( + name="fs_1", features=[feature_view_1[["string_col"]], feature_view_2] + ) + assert len(feature_service.feature_view_projections) == 2 + assert len(feature_service.feature_view_projections[0].features) == 0 + assert len(feature_service.feature_view_projections[0].desired_features) == 1 + assert len(feature_service.feature_view_projections[1].features) == 0 + assert len(feature_service.feature_view_projections[1].desired_features) == 0 + + update_feature_views_with_inferred_features_and_entities( + [feature_view_1, feature_view_2], + [entity1], + RepoConfig(provider="local", project="test"), + ) + feature_service.infer_features( + fvs_to_update={ + feature_view_1.name: feature_view_1, + feature_view_2.name: feature_view_2, + } + ) + + assert len(feature_view_1.schema) == 0 + assert len(feature_view_1.features) == 3 + assert len(feature_view_2.schema) == 0 + assert len(feature_view_2.features) == 3 + assert len(feature_service.feature_view_projections[0].features) == 1 + assert len(feature_service.feature_view_projections[1].features) == 3 + + +# TODO(felixwang9817): Add tests that interact with field mapping. diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py new file mode 100644 index 0000000000..b35da9e4ff --- /dev/null +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -0,0 +1,532 @@ +# Copyright 2021 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import timedelta +from tempfile import mkstemp + +import pandas as pd +import pytest +from pytest_lazyfixture import lazy_fixture + +from feast import FileSource +from feast.aggregation import Aggregation +from feast.data_format import AvroFormat, ParquetFormat +from feast.data_source import KafkaSource +from feast.entity import Entity +from feast.feature import Feature +from feast.feature_view import FeatureView +from feast.field import Field +from feast.on_demand_feature_view import RequestSource, on_demand_feature_view +from feast.registry import Registry +from feast.repo_config import RegistryConfig +from feast.stream_feature_view import StreamFeatureView +from feast.types import Array, Bytes, Float32, Int32, Int64, String +from feast.value_type import ValueType +from tests.utils.e2e_test_utils import ( + validate_project_uuid, + validate_registry_data_source_apply, +) + + +@pytest.fixture +def local_registry() -> Registry: + fd, registry_path = mkstemp() + registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) + return Registry(registry_config, None) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +def test_apply_entity_success(test_registry): + entity = Entity( + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, + ) + + project = "project" + + # Register Entity + test_registry.apply_entity(entity, project) + + entities = test_registry.list_entities(project) + + entity = entities[0] + assert ( + len(entities) == 1 + and entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + + entity = test_registry.get_entity("driver_car_id", project) + assert ( + entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + + test_registry.delete_entity("driver_car_id", project) + entities = test_registry.list_entities(project) + assert len(entities) == 0 + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +def test_apply_feature_view_success(test_registry): + # Create Feature Views + batch_source = FileSource( + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + ], + entities=[entity], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + project = "project" + + # Register Feature View + test_registry.apply_feature_view(fv1, project) + + feature_views = test_registry.list_feature_views(project) + + # List Feature Views + assert ( + len(feature_views) == 1 + and feature_views[0].name == "my_feature_view_1" + and feature_views[0].features[0].name == "fs1_my_feature_1" + and feature_views[0].features[0].dtype == Int64 + and feature_views[0].features[1].name == "fs1_my_feature_2" + and feature_views[0].features[1].dtype == String + and feature_views[0].features[2].name == "fs1_my_feature_3" + and feature_views[0].features[2].dtype == Array(String) + and feature_views[0].features[3].name == "fs1_my_feature_4" + and feature_views[0].features[3].dtype == Array(Bytes) + and feature_views[0].entities[0] == "fs1_my_entity_1" + ) + + feature_view = test_registry.get_feature_view("my_feature_view_1", project) + assert ( + feature_view.name == "my_feature_view_1" + and feature_view.features[0].name == "fs1_my_feature_1" + and feature_view.features[0].dtype == Int64 + and feature_view.features[1].name == "fs1_my_feature_2" + and feature_view.features[1].dtype == String + and feature_view.features[2].name == "fs1_my_feature_3" + and feature_view.features[2].dtype == Array(String) + and feature_view.features[3].name == "fs1_my_feature_4" + and feature_view.features[3].dtype == Array(Bytes) + and feature_view.entities[0] == "fs1_my_entity_1" + ) + + test_registry.delete_feature_view("my_feature_view_1", project) + feature_views = test_registry.list_feature_views(project) + assert len(feature_views) == 0 + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +def test_apply_on_demand_feature_view_success(test_registry): + # Create Feature Views + driver_stats = FileSource( + name="driver_stats_source", + path="data/driver_stats_lat_lon.parquet", + timestamp_field="event_timestamp", + created_timestamp_column="created", + description="A table describing the stats of a driver based on hourly logs", + owner="test2@gmail.com", + ) + + driver_daily_features_view = FeatureView( + name="driver_daily_features", + entities=["driver"], + ttl=timedelta(seconds=8640000000), + schema=[ + Field(name="daily_miles_driven", dtype=Float32), + Field(name="lat", dtype=Float32), + Field(name="lon", dtype=Float32), + Field(name="string_feature", dtype=String), + ], + online=True, + source=driver_stats, + tags={"production": "True"}, + owner="test2@gmail.com", + ) + + @on_demand_feature_view( + sources=[driver_daily_features_view], + schema=[Field(name="first_char", dtype=String)], + ) + def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_feature"].str[:1].astype("string") + return df + + project = "project" + + # Register Feature View + test_registry.apply_feature_view(location_features_from_push, project) + + feature_views = test_registry.list_on_demand_feature_views(project) + + # List Feature Views + assert ( + len(feature_views) == 1 + and feature_views[0].name == "location_features_from_push" + and feature_views[0].features[0].name == "first_char" + and feature_views[0].features[0].dtype == String + ) + + feature_view = test_registry.get_on_demand_feature_view( + "location_features_from_push", project + ) + assert ( + feature_view.name == "location_features_from_push" + and feature_view.features[0].name == "first_char" + and feature_view.features[0].dtype == String + ) + + test_registry.delete_feature_view("location_features_from_push", project) + feature_views = test_registry.list_on_demand_feature_views(project) + assert len(feature_views) == 0 + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +def test_apply_stream_feature_view_success(test_registry): + # Create Feature Views + def simple_udf(x: int): + return x + 3 + + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + watermark_delay_threshold=timedelta(days=1), + ) + + sfv = StreamFeatureView( + name="test kafka stream feature view", + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ), + Aggregation( + column="dummy_field2", + function="count", + time_window=timedelta(days=24), + ), + ], + timestamp_field="event_timestamp", + mode="spark", + source=stream_source, + udf=simple_udf, + tags={}, + ) + + project = "project" + + # Register Feature View + test_registry.apply_feature_view(sfv, project) + + stream_feature_views = test_registry.list_stream_feature_views(project) + + # List Feature Views + assert len(stream_feature_views) == 1 + assert stream_feature_views[0] == sfv + + test_registry.delete_feature_view("test kafka stream feature view", project) + stream_feature_views = test_registry.list_stream_feature_views(project) + assert len(stream_feature_views) == 0 + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +# TODO(kevjumba): remove this in feast 0.24 when deprecating +@pytest.mark.parametrize( + "request_source_schema", + [[Field(name="my_input_1", dtype=Int32)], {"my_input_1": ValueType.INT32}], +) +def test_modify_feature_views_success(test_registry, request_source_schema): + # Create Feature Views + batch_source = FileSource( + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + request_source = RequestSource( + name="request_source", + schema=request_source_schema, + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[Field(name="fs1_my_feature_1", dtype=Int64)], + entities=[entity], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + @on_demand_feature_view( + features=[ + Feature(name="odfv1_my_feature_1", dtype=ValueType.STRING), + Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), + ], + sources=[request_source], + ) + def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("category") + data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") + return data + + project = "project" + + # Register Feature Views + test_registry.apply_feature_view(odfv1, project) + test_registry.apply_feature_view(fv1, project) + + # Modify odfv by changing a single feature dtype + @on_demand_feature_view( + features=[ + Feature(name="odfv1_my_feature_1", dtype=ValueType.FLOAT), + Feature(name="odfv1_my_feature_2", dtype=ValueType.INT32), + ], + sources=[request_source], + ) + def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("float") + data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") + return data + + # Apply the modified odfv + test_registry.apply_feature_view(odfv1, project) + + # Check odfv + on_demand_feature_views = test_registry.list_on_demand_feature_views(project) + + assert ( + len(on_demand_feature_views) == 1 + and on_demand_feature_views[0].name == "odfv1" + and on_demand_feature_views[0].features[0].name == "odfv1_my_feature_1" + and on_demand_feature_views[0].features[0].dtype == Float32 + and on_demand_feature_views[0].features[1].name == "odfv1_my_feature_2" + and on_demand_feature_views[0].features[1].dtype == Int32 + ) + request_schema = on_demand_feature_views[0].get_request_data_schema() + assert ( + list(request_schema.keys())[0] == "my_input_1" + and list(request_schema.values())[0] == ValueType.INT32 + ) + + feature_view = test_registry.get_on_demand_feature_view("odfv1", project) + assert ( + feature_view.name == "odfv1" + and feature_view.features[0].name == "odfv1_my_feature_1" + and feature_view.features[0].dtype == Float32 + and feature_view.features[1].name == "odfv1_my_feature_2" + and feature_view.features[1].dtype == Int32 + ) + request_schema = feature_view.get_request_data_schema() + assert ( + list(request_schema.keys())[0] == "my_input_1" + and list(request_schema.values())[0] == ValueType.INT32 + ) + + # Make sure fv1 is untouched + feature_views = test_registry.list_feature_views(project) + + # List Feature Views + assert ( + len(feature_views) == 1 + and feature_views[0].name == "my_feature_view_1" + and feature_views[0].features[0].name == "fs1_my_feature_1" + and feature_views[0].features[0].dtype == Int64 + and feature_views[0].entities[0] == "fs1_my_entity_1" + ) + + feature_view = test_registry.get_feature_view("my_feature_view_1", project) + assert ( + feature_view.name == "my_feature_view_1" + and feature_view.features[0].name == "fs1_my_feature_1" + and feature_view.features[0].dtype == Int64 + and feature_view.entities[0] == "fs1_my_entity_1" + ) + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +@pytest.mark.parametrize( + "test_registry", + [lazy_fixture("local_registry")], +) +def test_apply_data_source(test_registry: Registry): + validate_registry_data_source_apply(test_registry) + + +def test_commit(): + fd, registry_path = mkstemp() + registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) + test_registry = Registry(registry_config, None) + + entity = Entity( + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, + ) + + project = "project" + + # Register Entity without commiting + test_registry.apply_entity(entity, project, commit=False) + assert test_registry.cached_registry_proto + assert len(test_registry.cached_registry_proto.project_metadata) == 1 + project_metadata = test_registry.cached_registry_proto.project_metadata[0] + project_uuid = project_metadata.project_uuid + assert len(project_uuid) == 36 + validate_project_uuid(project_uuid, test_registry) + + # Retrieving the entity should still succeed + entities = test_registry.list_entities(project, allow_cache=True) + entity = entities[0] + assert ( + len(entities) == 1 + and entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + validate_project_uuid(project_uuid, test_registry) + + entity = test_registry.get_entity("driver_car_id", project, allow_cache=True) + assert ( + entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + validate_project_uuid(project_uuid, test_registry) + + # Create new registry that points to the same store + registry_with_same_store = Registry(registry_config, None) + + # Retrieving the entity should fail since the store is empty + entities = registry_with_same_store.list_entities(project) + assert len(entities) == 0 + validate_project_uuid(project_uuid, registry_with_same_store) + + # commit from the original registry + test_registry.commit() + + # Reconstruct the new registry in order to read the newly written store + registry_with_same_store = Registry(registry_config, None) + + # Retrieving the entity should now succeed + entities = registry_with_same_store.list_entities(project) + entity = entities[0] + assert ( + len(entities) == 1 + and entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + validate_project_uuid(project_uuid, registry_with_same_store) + + entity = test_registry.get_entity("driver_car_id", project) + assert ( + entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) diff --git a/sdk/python/tests/utils/e2e_test_utils.py b/sdk/python/tests/utils/e2e_test_utils.py index 6eb69351af..7d898f9505 100644 --- a/sdk/python/tests/utils/e2e_test_utils.py +++ b/sdk/python/tests/utils/e2e_test_utils.py @@ -1,12 +1,39 @@ import math +import os +import tempfile import time +from contextlib import contextmanager from datetime import datetime, timedelta -from typing import Optional +from pathlib import Path +from textwrap import dedent +from typing import List, Optional import pandas as pd +import pytest +import yaml from pytz import utc -from feast import FeatureStore, FeatureView +from feast import FeatureStore, FeatureView, FileSource, RepoConfig +from feast.data_format import ParquetFormat +from feast.entity import Entity +from feast.field import Field +from feast.registry import Registry +from feast.types import Array, Bytes, Int64, String +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.universal.data_source_creator import ( + DataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.bigquery import ( + BigQueryDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.redshift import ( + RedshiftDataSourceCreator, +) def check_offline_and_online_features( @@ -138,3 +165,190 @@ def validate_offline_online_store_consistency( full_feature_names=full_feature_names, check_offline_store=check_offline_store, ) + + +def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): + offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) + + offline_store_config = offline_creator.create_offline_store_config() + online_store = test_repo_config.online_store + + config = RepoConfig( + registry=str(Path(repo_dir_name) / "registry.db"), + project=project, + provider=test_repo_config.provider, + offline_store=offline_store_config, + online_store=online_store, + repo_path=str(Path(repo_dir_name)), + ) + config_dict = config.dict() + if ( + isinstance(config_dict["online_store"], dict) + and "redis_type" in config_dict["online_store"] + ): + if str(config_dict["online_store"]["redis_type"]) == "RedisType.redis_cluster": + config_dict["online_store"]["redis_type"] = "redis_cluster" + elif str(config_dict["online_store"]["redis_type"]) == "RedisType.redis": + config_dict["online_store"]["redis_type"] = "redis" + config_dict["repo_path"] = str(config_dict["repo_path"]) + return yaml.safe_dump(config_dict) + + +NULLABLE_ONLINE_STORE_CONFIGS: List[IntegrationTestRepoConfig] = [ + IntegrationTestRepoConfig( + provider="local", + offline_store_creator=FileDataSourceCreator, + online_store=None, + ), +] + +if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": + NULLABLE_ONLINE_STORE_CONFIGS.extend( + [ + IntegrationTestRepoConfig( + provider="gcp", + offline_store_creator=BigQueryDataSourceCreator, + online_store=None, + ), + IntegrationTestRepoConfig( + provider="aws", + offline_store_creator=RedshiftDataSourceCreator, + online_store=None, + ), + ] + ) + + +@contextmanager +def setup_third_party_provider_repo(provider_name: str): + with tempfile.TemporaryDirectory() as repo_dir_name: + + # Construct an example repo in a temporary dir + repo_path = Path(repo_dir_name) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text( + dedent( + f""" + project: foo + registry: data/registry.db + provider: {provider_name} + online_store: + path: data/online_store.db + type: sqlite + offline_store: + type: file + """ + ) + ) + + (repo_path / "foo").mkdir() + repo_example = repo_path / "foo/provider.py" + repo_example.write_text( + (Path(__file__).parents[2] / "foo_provider.py").read_text() + ) + + yield repo_path + + +@contextmanager +def setup_third_party_registry_store_repo(registry_store: str): + with tempfile.TemporaryDirectory() as repo_dir_name: + + # Construct an example repo in a temporary dir + repo_path = Path(repo_dir_name) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text( + dedent( + f""" + project: foo + registry: + registry_store_type: {registry_store} + path: foobar://foo.bar + provider: local + online_store: + path: data/online_store.db + type: sqlite + offline_store: + type: file + """ + ) + ) + + (repo_path / "foo").mkdir() + repo_example = repo_path / "foo/registry_store.py" + repo_example.write_text( + (Path(__file__).parents[2] / "foo_registry_store.py").read_text() + ) + + yield repo_path + + +def validate_registry_data_source_apply(test_registry: Registry): + # Create Feature Views + batch_source = FileSource( + name="test_source", + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + ], + entities=[entity], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + project = "project" + + # Register data source and feature view + test_registry.apply_data_source(batch_source, project, commit=False) + test_registry.apply_feature_view(fv1, project, commit=True) + + registry_feature_views = test_registry.list_feature_views(project) + registry_data_sources = test_registry.list_data_sources(project) + assert len(registry_feature_views) == 1 + assert len(registry_data_sources) == 1 + registry_feature_view = registry_feature_views[0] + assert registry_feature_view.batch_source == batch_source + registry_data_source = registry_data_sources[0] + assert registry_data_source == batch_source + + # Check that change to batch source propagates + batch_source.timestamp_field = "new_ts_col" + test_registry.apply_data_source(batch_source, project, commit=False) + test_registry.apply_feature_view(fv1, project, commit=True) + registry_feature_views = test_registry.list_feature_views(project) + registry_data_sources = test_registry.list_data_sources(project) + assert len(registry_feature_views) == 1 + assert len(registry_data_sources) == 1 + registry_feature_view = registry_feature_views[0] + assert registry_feature_view.batch_source == batch_source + registry_batch_source = test_registry.list_data_sources(project)[0] + assert registry_batch_source == batch_source + + test_registry.teardown() + + # Will try to reload registry, which will fail because the file has been deleted + with pytest.raises(FileNotFoundError): + test_registry._get_registry_proto(project=project) + + +def validate_project_uuid(project_uuid, test_registry): + assert len(test_registry.cached_registry_proto.project_metadata) == 1 + project_metadata = test_registry.cached_registry_proto.project_metadata[0] + assert project_metadata.project_uuid == project_uuid diff --git a/sdk/python/tests/utils/feature_store_test_utils.py b/sdk/python/tests/utils/feature_store_test_utils.py new file mode 100644 index 0000000000..67866ffcaa --- /dev/null +++ b/sdk/python/tests/utils/feature_store_test_utils.py @@ -0,0 +1,399 @@ +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal as pd_assert_frame_equal +from pytz import utc + +from feast import utils +from feast.errors import FeatureNameCollisionError +from feast.feature_view import FeatureView + + +def convert_timestamp_records_to_utc( + records: List[Dict[str, Any]], column: str +) -> List[Dict[str, Any]]: + for record in records: + record[column] = utils.make_tzaware(record[column]).astimezone(utc) + return records + + +# Find the latest record in the given time range and filter +def find_latest_record( + records: List[Dict[str, Any]], + ts_key: str, + ts_start: datetime, + ts_end: datetime, + filter_keys: Optional[List[str]] = None, + filter_values: Optional[List[Any]] = None, +) -> Dict[str, Any]: + filter_keys = filter_keys or [] + filter_values = filter_values or [] + assert len(filter_keys) == len(filter_values) + found_record: Dict[str, Any] = {} + for record in records: + if ( + all( + [ + record[filter_key] == filter_value + for filter_key, filter_value in zip(filter_keys, filter_values) + ] + ) + and ts_start <= record[ts_key] <= ts_end + ): + if not found_record or found_record[ts_key] < record[ts_key]: + found_record = record + return found_record + + +def get_expected_training_df( + customer_df: pd.DataFrame, + customer_fv: FeatureView, + driver_df: pd.DataFrame, + driver_fv: FeatureView, + orders_df: pd.DataFrame, + order_fv: FeatureView, + location_df: pd.DataFrame, + location_fv: FeatureView, + global_df: pd.DataFrame, + global_fv: FeatureView, + field_mapping_df: pd.DataFrame, + field_mapping_fv: FeatureView, + entity_df: pd.DataFrame, + event_timestamp: str, + full_feature_names: bool = False, +): + # Convert all pandas dataframes into records with UTC timestamps + customer_records = convert_timestamp_records_to_utc( + customer_df.to_dict("records"), customer_fv.batch_source.timestamp_field + ) + driver_records = convert_timestamp_records_to_utc( + driver_df.to_dict("records"), driver_fv.batch_source.timestamp_field + ) + order_records = convert_timestamp_records_to_utc( + orders_df.to_dict("records"), event_timestamp + ) + location_records = convert_timestamp_records_to_utc( + location_df.to_dict("records"), location_fv.batch_source.timestamp_field + ) + global_records = convert_timestamp_records_to_utc( + global_df.to_dict("records"), global_fv.batch_source.timestamp_field + ) + field_mapping_records = convert_timestamp_records_to_utc( + field_mapping_df.to_dict("records"), + field_mapping_fv.batch_source.timestamp_field, + ) + entity_rows = convert_timestamp_records_to_utc( + entity_df.to_dict("records"), event_timestamp + ) + + # Set sufficiently large ttl that it effectively functions as infinite for the calculations below. + default_ttl = timedelta(weeks=52) + + # Manually do point-in-time join of driver, customer, and order records against + # the entity df + for entity_row in entity_rows: + customer_record = find_latest_record( + customer_records, + ts_key=customer_fv.batch_source.timestamp_field, + ts_start=entity_row[event_timestamp] + - _get_feature_view_ttl(customer_fv, default_ttl), + ts_end=entity_row[event_timestamp], + filter_keys=["customer_id"], + filter_values=[entity_row["customer_id"]], + ) + driver_record = find_latest_record( + driver_records, + ts_key=driver_fv.batch_source.timestamp_field, + ts_start=entity_row[event_timestamp] + - _get_feature_view_ttl(driver_fv, default_ttl), + ts_end=entity_row[event_timestamp], + filter_keys=["driver_id"], + filter_values=[entity_row["driver_id"]], + ) + order_record = find_latest_record( + order_records, + ts_key=customer_fv.batch_source.timestamp_field, + ts_start=entity_row[event_timestamp] + - _get_feature_view_ttl(order_fv, default_ttl), + ts_end=entity_row[event_timestamp], + filter_keys=["customer_id", "driver_id"], + filter_values=[entity_row["customer_id"], entity_row["driver_id"]], + ) + origin_record = find_latest_record( + location_records, + ts_key=location_fv.batch_source.timestamp_field, + ts_start=order_record[event_timestamp] + - _get_feature_view_ttl(location_fv, default_ttl), + ts_end=order_record[event_timestamp], + filter_keys=["location_id"], + filter_values=[order_record["origin_id"]], + ) + destination_record = find_latest_record( + location_records, + ts_key=location_fv.batch_source.timestamp_field, + ts_start=order_record[event_timestamp] + - _get_feature_view_ttl(location_fv, default_ttl), + ts_end=order_record[event_timestamp], + filter_keys=["location_id"], + filter_values=[order_record["destination_id"]], + ) + global_record = find_latest_record( + global_records, + ts_key=global_fv.batch_source.timestamp_field, + ts_start=order_record[event_timestamp] + - _get_feature_view_ttl(global_fv, default_ttl), + ts_end=order_record[event_timestamp], + ) + + field_mapping_record = find_latest_record( + field_mapping_records, + ts_key=field_mapping_fv.batch_source.timestamp_field, + ts_start=order_record[event_timestamp] + - _get_feature_view_ttl(field_mapping_fv, default_ttl), + ts_end=order_record[event_timestamp], + ) + + entity_row.update( + { + ( + f"customer_profile__{k}" if full_feature_names else k + ): customer_record.get(k, None) + for k in ( + "current_balance", + "avg_passenger_count", + "lifetime_trip_count", + ) + } + ) + entity_row.update( + { + (f"driver_stats__{k}" if full_feature_names else k): driver_record.get( + k, None + ) + for k in ("conv_rate", "avg_daily_trips") + } + ) + entity_row.update( + { + (f"order__{k}" if full_feature_names else k): order_record.get(k, None) + for k in ("order_is_success",) + } + ) + entity_row.update( + { + "origin__temperature": origin_record.get("temperature", None), + "destination__temperature": destination_record.get("temperature", None), + } + ) + entity_row.update( + { + (f"global_stats__{k}" if full_feature_names else k): global_record.get( + k, None + ) + for k in ( + "num_rides", + "avg_ride_length", + ) + } + ) + + # get field_mapping_record by column name, but label by feature name + entity_row.update( + { + ( + f"field_mapping__{feature}" if full_feature_names else feature + ): field_mapping_record.get(column, None) + for ( + column, + feature, + ) in field_mapping_fv.batch_source.field_mapping.items() + } + ) + + # Convert records back to pandas dataframe + expected_df = pd.DataFrame(entity_rows) + + # Move "event_timestamp" column to front + current_cols = expected_df.columns.tolist() + current_cols.remove(event_timestamp) + expected_df = expected_df[[event_timestamp] + current_cols] + + # Cast some columns to expected types, since we lose information when converting pandas DFs into Python objects. + if full_feature_names: + expected_column_types = { + "order__order_is_success": "int32", + "driver_stats__conv_rate": "float32", + "customer_profile__current_balance": "float32", + "customer_profile__avg_passenger_count": "float32", + "global_stats__avg_ride_length": "float32", + "field_mapping__feature_name": "int32", + } + else: + expected_column_types = { + "order_is_success": "int32", + "conv_rate": "float32", + "current_balance": "float32", + "avg_passenger_count": "float32", + "avg_ride_length": "float32", + "feature_name": "int32", + } + + for col, typ in expected_column_types.items(): + expected_df[col] = expected_df[col].astype(typ) + + conv_feature_name = "driver_stats__conv_rate" if full_feature_names else "conv_rate" + conv_plus_feature_name = get_response_feature_name( + "conv_rate_plus_100", full_feature_names + ) + expected_df[conv_plus_feature_name] = expected_df[conv_feature_name] + 100 + expected_df[ + get_response_feature_name("conv_rate_plus_100_rounded", full_feature_names) + ] = ( + expected_df[conv_plus_feature_name] + .astype("float") + .round() + .astype(pd.Int32Dtype()) + ) + if "val_to_add" in expected_df.columns: + expected_df[ + get_response_feature_name("conv_rate_plus_val_to_add", full_feature_names) + ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) + + return expected_df + + +def get_response_feature_name(feature: str, full_feature_names: bool) -> str: + if feature in {"conv_rate", "avg_daily_trips"} and full_feature_names: + return f"driver_stats__{feature}" + + if ( + feature + in { + "conv_rate_plus_100", + "conv_rate_plus_100_rounded", + "conv_rate_plus_val_to_add", + } + and full_feature_names + ): + return f"conv_rate_plus_100__{feature}" + + return feature + + +def assert_feature_service_correctness( + store, feature_service, full_feature_names, entity_df, expected_df, event_timestamp +): + + job_from_df = store.get_historical_features( + entity_df=entity_df, + features=feature_service, + full_feature_names=full_feature_names, + ) + + actual_df_from_df_entities = job_from_df.to_df() + + expected_df = expected_df[ + [ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + get_response_feature_name("conv_rate", full_feature_names), + get_response_feature_name("conv_rate_plus_100", full_feature_names), + "driver_age", + ] + ] + + validate_dataframes( + expected_df, + actual_df_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + + +def assert_feature_service_entity_mapping_correctness( + store, feature_service, full_feature_names, entity_df, expected_df, event_timestamp +): + if full_feature_names: + job_from_df = store.get_historical_features( + entity_df=entity_df, + features=feature_service, + full_feature_names=full_feature_names, + ) + actual_df_from_df_entities = job_from_df.to_df() + + expected_df: pd.DataFrame = ( + expected_df.sort_values( + by=[ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + "origin_id", + "destination_id", + ] + ) + .drop_duplicates() + .reset_index(drop=True) + ) + expected_df = expected_df[ + [ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + "origin_id", + "destination_id", + "origin__temperature", + "destination__temperature", + ] + ] + + validate_dataframes( + expected_df, + actual_df_from_df_entities, + keys=[ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + "origin_id", + "destination_id", + ], + ) + else: + # using 2 of the same FeatureView without full_feature_names=True will result in collision + with pytest.raises(FeatureNameCollisionError): + job_from_df = store.get_historical_features( + entity_df=entity_df, + features=feature_service, + full_feature_names=full_feature_names, + ) + + +def validate_dataframes(expected_df, actual_df, keys): + expected_df: pd.DataFrame = ( + expected_df.sort_values(by=keys).drop_duplicates().reset_index(drop=True) + ) + + actual_df = ( + actual_df[expected_df.columns] + .sort_values(by=keys) + .drop_duplicates() + .reset_index(drop=True) + ) + + pd_assert_frame_equal( + expected_df, + actual_df, + check_dtype=False, + ) + + +def _get_feature_view_ttl( + feature_view: FeatureView, default_ttl: timedelta +) -> timedelta: + """Returns the ttl of a feature view if it is non-zero. Otherwise returns the specified default.""" + return feature_view.ttl if feature_view.ttl else default_ttl diff --git a/sdk/python/tests/utils/feature_utils.py b/sdk/python/tests/utils/feature_test_utils.py similarity index 100% rename from sdk/python/tests/utils/feature_utils.py rename to sdk/python/tests/utils/feature_test_utils.py diff --git a/sdk/python/tests/utils/logged_features.py b/sdk/python/tests/utils/logged_features.py deleted file mode 100644 index dc844a60b4..0000000000 --- a/sdk/python/tests/utils/logged_features.py +++ /dev/null @@ -1,81 +0,0 @@ -import contextlib -import datetime -import tempfile -import uuid -from pathlib import Path -from typing import Iterator, Union - -import numpy as np -import pandas as pd -import pyarrow - -from feast import FeatureService, FeatureStore, FeatureView -from feast.errors import FeatureViewNotFoundException -from feast.feature_logging import LOG_DATE_FIELD, LOG_TIMESTAMP_FIELD, REQUEST_ID_FIELD -from feast.protos.feast.serving.ServingService_pb2 import FieldStatus - - -def prepare_logs( - source_df: pd.DataFrame, feature_service: FeatureService, store: FeatureStore -) -> pd.DataFrame: - num_rows = source_df.shape[0] - - logs_df = pd.DataFrame() - logs_df[REQUEST_ID_FIELD] = [str(uuid.uuid4()) for _ in range(num_rows)] - logs_df[LOG_TIMESTAMP_FIELD] = pd.Series( - np.random.randint(0, 7 * 24 * 3600, num_rows) - ).map(lambda secs: pd.Timestamp.utcnow() - datetime.timedelta(seconds=secs)) - logs_df[LOG_DATE_FIELD] = logs_df[LOG_TIMESTAMP_FIELD].dt.date - - for projection in feature_service.feature_view_projections: - try: - view = store.get_feature_view(projection.name) - except FeatureViewNotFoundException: - view = store.get_on_demand_feature_view(projection.name) - for source in view.source_request_sources.values(): - for field in source.schema: - logs_df[field.name] = source_df[field.name] - else: - for entity_name in view.entities: - entity = store.get_entity(entity_name) - logs_df[entity.join_key] = source_df[entity.join_key] - - for feature in projection.features: - source_field = ( - feature.name - if feature.name in source_df.columns - else f"{projection.name_to_use()}__{feature.name}" - ) - destination_field = f"{projection.name_to_use()}__{feature.name}" - logs_df[destination_field] = source_df[source_field] - logs_df[f"{destination_field}__timestamp"] = source_df[ - "event_timestamp" - ].dt.floor("s") - if logs_df[f"{destination_field}__timestamp"].dt.tz: - logs_df[f"{destination_field}__timestamp"] = logs_df[ - f"{destination_field}__timestamp" - ].dt.tz_convert(None) - logs_df[f"{destination_field}__status"] = FieldStatus.PRESENT - if isinstance(view, FeatureView) and view.ttl: - logs_df[f"{destination_field}__status"] = logs_df[ - f"{destination_field}__status" - ].mask( - logs_df[f"{destination_field}__timestamp"] - < (datetime.datetime.utcnow() - view.ttl), - FieldStatus.OUTSIDE_MAX_AGE, - ) - - return logs_df - - -@contextlib.contextmanager -def to_logs_dataset( - table: pyarrow.Table, pass_as_path: bool -) -> Iterator[Union[pyarrow.Table, Path]]: - if not pass_as_path: - yield table - return - - with tempfile.TemporaryDirectory() as temp_dir: - pyarrow.parquet.write_to_dataset(table, root_path=temp_dir) - yield Path(temp_dir) From af96f09e84daa3595f7858aa7348aa1a6c9335d3 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 13:46:36 -0700 Subject: [PATCH 05/30] Verify tests Signed-off-by: Kevin Zhang --- .../integration/e2e/test_go_feature_server.py | 2 +- .../tests/integration/e2e/test_validation.py | 2 +- .../offline_store/test_feature_logging.py | 2 +- .../online_store/test_feature_service_read.py | 2 +- .../integration/registration/test_cli.py | 2 +- .../integration/scaffolding/test_init.py | 4 + .../scaffolding/test_partial_apply.py | 2 +- .../test_dynamodb_online_store.py | 40 ++--- .../infra}/online_store/test_e2e_local.py | 163 ++++-------------- .../infra}/scaffolding/test_repo_config.py | 0 .../scaffolding/test_repo_operations.py | 0 .../tests/unit/infra/test_local_registry.py | 11 +- sdk/python/tests/unit/test_feature_service.py | 15 +- sdk/python/tests/unit/test_proto_json.py | 10 +- .../tests/utils/feature_store_test_utils.py | 99 ++++++++++- ...est.py => online_read_write_test_utils.py} | 0 sdk/python/tests/utils/online_store_utils.py | 10 +- ...ark.py => online_write_benchmark_utils.py} | 0 sdk/python/tests/utils/test_wrapper_utils.py | 14 ++ 19 files changed, 193 insertions(+), 185 deletions(-) rename sdk/python/tests/{integration => unit/infra}/online_store/test_e2e_local.py (54%) rename sdk/python/tests/{integration => unit/infra}/scaffolding/test_repo_config.py (100%) rename sdk/python/tests/{integration => unit/infra}/scaffolding/test_repo_operations.py (100%) rename sdk/python/tests/utils/{online_read_write_test.py => online_read_write_test_utils.py} (100%) rename sdk/python/tests/utils/{online_write_benchmark.py => online_write_benchmark_utils.py} (100%) create mode 100644 sdk/python/tests/utils/test_wrapper_utils.py diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 502047b953..fb28462864 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -31,7 +31,7 @@ driver, location, ) -from tests.utils.feature_utils import generate_expected_logs, get_latest_rows +from tests.utils.feature_test_utils import generate_expected_logs, get_latest_rows from tests.utils.http_utils import check_port_open, free_port diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index f7dcd7b494..982b28547a 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -27,7 +27,7 @@ location, ) from tests.utils.cli_utils import CliRunner -from tests.utils.logged_features import prepare_logs +from tests.utils.feature_test_utils import prepare_logs _features = [ "customer_profile:current_balance", diff --git a/sdk/python/tests/integration/offline_store/test_feature_logging.py b/sdk/python/tests/integration/offline_store/test_feature_logging.py index 5d74ee284c..d28f92ce6d 100644 --- a/sdk/python/tests/integration/offline_store/test_feature_logging.py +++ b/sdk/python/tests/integration/offline_store/test_feature_logging.py @@ -22,7 +22,7 @@ location, ) from tests.integration.feature_repos.universal.feature_views import conv_rate_plus_100 -from tests.utils.logged_features import prepare_logs, to_logs_dataset +from tests.utils.feature_test_utils import prepare_logs, to_logs_dataset @pytest.mark.integration diff --git a/sdk/python/tests/integration/online_store/test_feature_service_read.py b/sdk/python/tests/integration/online_store/test_feature_service_read.py index 33c318b9ed..d7b9c07664 100644 --- a/sdk/python/tests/integration/online_store/test_feature_service_read.py +++ b/sdk/python/tests/integration/online_store/test_feature_service_read.py @@ -1,7 +1,7 @@ import pytest from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.online_read_write_test import basic_rw_test +from tests.utils.online_read_write_test_utils import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index 814f0fbba6..e4f78c95e3 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -15,7 +15,7 @@ setup_third_party_provider_repo, setup_third_party_registry_store_repo, ) -from tests.utils.online_read_write_test import basic_rw_test +from tests.utils.online_read_write_test_utils import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/integration/scaffolding/test_init.py b/sdk/python/tests/integration/scaffolding/test_init.py index 1cada91ea0..619e7e8625 100644 --- a/sdk/python/tests/integration/scaffolding/test_init.py +++ b/sdk/python/tests/integration/scaffolding/test_init.py @@ -3,9 +3,12 @@ from pathlib import Path from textwrap import dedent +import pytest + from tests.utils.cli_utils import CliRunner +@pytest.mark.integration def test_repo_init() -> None: """ This test simply makes sure that you can run `feast apply && feast materialize` on @@ -28,6 +31,7 @@ def test_repo_init() -> None: assert result.returncode == 0 +@pytest.mark.integration def test_repo_init_with_underscore_in_project_name() -> None: """ Test `feast init` with underscore in the project name diff --git a/sdk/python/tests/integration/scaffolding/test_partial_apply.py b/sdk/python/tests/integration/scaffolding/test_partial_apply.py index e5a7206b96..810da0776b 100644 --- a/sdk/python/tests/integration/scaffolding/test_partial_apply.py +++ b/sdk/python/tests/integration/scaffolding/test_partial_apply.py @@ -5,7 +5,7 @@ from feast import BigQuerySource, Entity, FeatureView, Field from feast.types import Float32, String from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.online_read_write_test import basic_rw_test +from tests.utils.online_read_write_test_utils import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 07e22017b5..1aff82abcc 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -15,9 +15,9 @@ from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RepoConfig from tests.utils.online_store_utils import ( - _create_n_customer_test_samples, - _create_test_table, - _insert_data_test_table, + create_n_customer_test_samples, + create_test_table, + insert_data_test_table, ) REGISTRY = "s3://test_registry/registry.db" @@ -165,9 +165,9 @@ def test_dynamodb_online_store_online_read( ): """Test DynamoDBOnlineStore online_read method.""" db_table_name = f"{TABLE_NAME}_online_read_{n_samples}" - _create_test_table(PROJECT, db_table_name, REGION) - data = _create_n_customer_test_samples(n=n_samples) - _insert_data_test_table(data, PROJECT, db_table_name, REGION) + create_test_table(PROJECT, db_table_name, REGION) + data = create_n_customer_test_samples(n=n_samples) + insert_data_test_table(data, PROJECT, db_table_name, REGION) entity_keys, features, *rest = zip(*data) returned_items = dynamodb_online_store.online_read( @@ -186,8 +186,8 @@ def test_dynamodb_online_store_online_write_batch( ): """Test DynamoDBOnlineStore online_write_batch method.""" db_table_name = f"{TABLE_NAME}_online_write_batch_{n_samples}" - _create_test_table(PROJECT, db_table_name, REGION) - data = _create_n_customer_test_samples() + create_test_table(PROJECT, db_table_name, REGION) + data = create_n_customer_test_samples() entity_keys, features, *rest = zip(*data) dynamodb_online_store.online_write_batch( @@ -211,10 +211,10 @@ def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore update method.""" # create dummy table to keep db_table_keep_name = f"{TABLE_NAME}_keep_update" - _create_test_table(PROJECT, db_table_keep_name, REGION) + create_test_table(PROJECT, db_table_keep_name, REGION) # create dummy table to delete db_table_delete_name = f"{TABLE_NAME}_delete_update" - _create_test_table(PROJECT, db_table_delete_name, REGION) + create_test_table(PROJECT, db_table_delete_name, REGION) dynamodb_online_store.update( config=repo_config, @@ -240,8 +240,8 @@ def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore teardown method.""" db_table_delete_name_one = f"{TABLE_NAME}_delete_teardown_1" db_table_delete_name_two = f"{TABLE_NAME}_delete_teardown_2" - _create_test_table(PROJECT, db_table_delete_name_one, REGION) - _create_test_table(PROJECT, db_table_delete_name_two, REGION) + create_test_table(PROJECT, db_table_delete_name_one, REGION) + create_test_table(PROJECT, db_table_delete_name_two, REGION) dynamodb_online_store.teardown( config=repo_config, @@ -267,9 +267,9 @@ def test_dynamodb_online_store_online_read_unknown_entity( ): """Test DynamoDBOnlineStore online_read method.""" n_samples = 2 - _create_test_table(PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION) - data = _create_n_customer_test_samples(n=n_samples) - _insert_data_test_table( + create_test_table(PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION) + data = create_n_customer_test_samples(n=n_samples) + insert_data_test_table( data, PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION ) @@ -304,8 +304,8 @@ def test_dynamodb_online_store_online_read_unknown_entity( def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): """Test DynamoDBOnline Store deduplicate write batch request items.""" dynamodb_tbl = f"{TABLE_NAME}_batch_non_duplicates" - _create_test_table(PROJECT, dynamodb_tbl, REGION) - data = _create_n_customer_test_samples() + create_test_table(PROJECT, dynamodb_tbl, REGION) + data = create_n_customer_test_samples() data_duplicate = deepcopy(data) dynamodb_resource = boto3.resource("dynamodb", region_name=REGION) table_instance = dynamodb_resource.Table(f"{PROJECT}.{dynamodb_tbl}") @@ -330,9 +330,9 @@ def test_dynamodb_online_store_online_read_unknown_entity_end_of_batch( """ batch_size = repo_config.online_store.batch_size n_samples = batch_size - _create_test_table(PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION) - data = _create_n_customer_test_samples(n=n_samples) - _insert_data_test_table( + create_test_table(PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION) + data = create_n_customer_test_samples(n=n_samples) + insert_data_test_table( data, PROJECT, f"{TABLE_NAME}_unknown_entity_{n_samples}", REGION ) diff --git a/sdk/python/tests/integration/online_store/test_e2e_local.py b/sdk/python/tests/unit/infra/online_store/test_e2e_local.py similarity index 54% rename from sdk/python/tests/integration/online_store/test_e2e_local.py rename to sdk/python/tests/unit/infra/online_store/test_e2e_local.py index 34758a50d0..0c4cf36563 100644 --- a/sdk/python/tests/integration/online_store/test_e2e_local.py +++ b/sdk/python/tests/unit/infra/online_store/test_e2e_local.py @@ -4,7 +4,6 @@ from pathlib import Path import pandas as pd -from pytz import utc from feast.driver_test_data import ( create_driver_hourly_stats_df, @@ -12,135 +11,7 @@ ) from feast.feature_store import FeatureStore from tests.utils.cli_utils import CliRunner, get_example_repo - - -def _get_last_feature_row(df: pd.DataFrame, driver_id, max_date: datetime): - """Manually extract last feature value from a dataframe for a given driver_id with up to `max_date` date""" - filtered = df[ - (df["driver_id"] == driver_id) - & (df["event_timestamp"] < max_date.replace(tzinfo=utc)) - ] - max_ts = filtered.loc[filtered["event_timestamp"].idxmax()]["event_timestamp"] - filtered_by_ts = filtered[filtered["event_timestamp"] == max_ts] - return filtered_by_ts.loc[filtered_by_ts["created"].idxmax()] - - -def _assert_online_features( - store: FeatureStore, driver_df: pd.DataFrame, max_date: datetime -): - """Assert that features in online store are up to date with `max_date` date.""" - # Read features back - response = store.get_online_features( - features=[ - "driver_hourly_stats:conv_rate", - "driver_hourly_stats:avg_daily_trips", - "global_daily_stats:num_rides", - "global_daily_stats:avg_ride_length", - ], - entity_rows=[{"driver_id": 1001}], - full_feature_names=True, - ) - - # Float features should still be floats. - assert ( - response.proto.results[ - list(response.proto.metadata.feature_names.val).index( - "driver_hourly_stats__conv_rate" - ) - ] - .values[0] - .float_val - > 0 - ), response.to_dict() - - result = response.to_dict() - assert len(result) == 5 - assert "driver_hourly_stats__avg_daily_trips" in result - assert "driver_hourly_stats__conv_rate" in result - assert ( - abs( - result["driver_hourly_stats__conv_rate"][0] - - _get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] - ) - < 0.01 - ) - assert "global_daily_stats__num_rides" in result - assert "global_daily_stats__avg_ride_length" in result - - # Test the ODFV if it exists. - odfvs = store.list_on_demand_feature_views() - if odfvs and odfvs[0].name == "conv_rate_plus_100": - response = store.get_online_features( - features=[ - "conv_rate_plus_100:conv_rate_plus_100", - "conv_rate_plus_100:conv_rate_plus_val_to_add", - ], - entity_rows=[{"driver_id": 1001, "val_to_add": 100}], - full_feature_names=True, - ) - - # Check that float64 feature is stored correctly in proto format. - assert ( - response.proto.results[ - list(response.proto.metadata.feature_names.val).index( - "conv_rate_plus_100__conv_rate_plus_100" - ) - ] - .values[0] - .double_val - > 0 - ) - - result = response.to_dict() - assert len(result) == 3 - assert "conv_rate_plus_100__conv_rate_plus_100" in result - assert "conv_rate_plus_100__conv_rate_plus_val_to_add" in result - assert ( - abs( - result["conv_rate_plus_100__conv_rate_plus_100"][0] - - (_get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] + 100) - ) - < 0.01 - ) - assert ( - abs( - result["conv_rate_plus_100__conv_rate_plus_val_to_add"][0] - - (_get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] + 100) - ) - < 0.01 - ) - - -def _test_materialize_and_online_retrieval( - runner: CliRunner, - store: FeatureStore, - start_date: datetime, - end_date: datetime, - driver_df: pd.DataFrame, -): - assert store.repo_path is not None - - # Test `feast materialize` and online retrieval. - r = runner.run( - [ - "materialize", - start_date.isoformat(), - (end_date - timedelta(days=7)).isoformat(), - ], - cwd=Path(store.repo_path), - ) - - assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}" - _assert_online_features(store, driver_df, end_date - timedelta(days=7)) - - # Test `feast materialize-incremental` and online retrieval. - r = runner.run( - ["materialize-incremental", end_date.isoformat()], - cwd=Path(store.repo_path), - ) - - assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}" - _assert_online_features(store, driver_df, end_date) +from tests.utils.feature_store_test_utils import validate_online_features def test_e2e_local() -> None: @@ -217,3 +88,35 @@ def test_e2e_local() -> None: assert returncode != 0 assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output) + + +def _test_materialize_and_online_retrieval( + runner: CliRunner, + store: FeatureStore, + start_date: datetime, + end_date: datetime, + driver_df: pd.DataFrame, +): + assert store.repo_path is not None + + # Test `feast materialize` and online retrieval. + r = runner.run( + [ + "materialize", + start_date.isoformat(), + (end_date - timedelta(days=7)).isoformat(), + ], + cwd=Path(store.repo_path), + ) + + assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}" + validate_online_features(store, driver_df, end_date - timedelta(days=7)) + + # Test `feast materialize-incremental` and online retrieval. + r = runner.run( + ["materialize-incremental", end_date.isoformat()], + cwd=Path(store.repo_path), + ) + + assert r.returncode == 0, f"stdout: {r.stdout}\n stderr: {r.stderr}" + validate_online_features(store, driver_df, end_date) diff --git a/sdk/python/tests/integration/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py similarity index 100% rename from sdk/python/tests/integration/scaffolding/test_repo_config.py rename to sdk/python/tests/unit/infra/scaffolding/test_repo_config.py diff --git a/sdk/python/tests/integration/scaffolding/test_repo_operations.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_operations.py similarity index 100% rename from sdk/python/tests/integration/scaffolding/test_repo_operations.py rename to sdk/python/tests/unit/infra/scaffolding/test_repo_operations.py diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index b35da9e4ff..88ab7d899d 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -32,10 +32,7 @@ from feast.stream_feature_view import StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType -from tests.utils.e2e_test_utils import ( - validate_project_uuid, - validate_registry_data_source_apply, -) +from tests.utils.e2e_test_utils import validate_registry_data_source_apply @pytest.fixture @@ -530,3 +527,9 @@ def test_commit(): # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto(project=project) + + +def validate_project_uuid(project_uuid, test_registry): + assert len(test_registry.cached_registry_proto.project_metadata) == 1 + project_metadata = test_registry.cached_registry_proto.project_metadata[0] + assert project_metadata.project_uuid == project_uuid diff --git a/sdk/python/tests/unit/test_feature_service.py b/sdk/python/tests/unit/test_feature_service.py index fc4fd70bcb..2bd6a5b127 100644 --- a/sdk/python/tests/unit/test_feature_service.py +++ b/sdk/python/tests/unit/test_feature_service.py @@ -5,6 +5,7 @@ from feast.field import Field from feast.infra.offline_stores.file_source import FileSource from feast.types import Float32 +from tests.utils.test_wrapper_utils import no_warnings def test_feature_service_with_description(): @@ -16,7 +17,6 @@ def test_feature_service_with_description(): def test_feature_service_without_description(): feature_service = FeatureService(name="my-feature-service", features=[]) - # assert feature_service.to_proto().spec.description == "" @@ -75,19 +75,6 @@ def test_feature_view_kw_args_warning(): service = FeatureService(features=[], tags={"tag_1": "tag"}, description="desc") -def no_warnings(func): - def wrapper_no_warnings(*args, **kwargs): - with pytest.warns(None) as warnings: - func(*args, **kwargs) - - if len(warnings) > 0: - raise AssertionError( - "Warnings were raised: " + ", ".join([str(w) for w in warnings]) - ) - - return wrapper_no_warnings - - @no_warnings def test_feature_view_kw_args_normal(): file_source = FileSource(name="my-file-source", path="test.parquet") diff --git a/sdk/python/tests/unit/test_proto_json.py b/sdk/python/tests/unit/test_proto_json.py index 235ebc7f93..b5e01744e4 100644 --- a/sdk/python/tests/unit/test_proto_json.py +++ b/sdk/python/tests/unit/test_proto_json.py @@ -12,11 +12,6 @@ FeatureVector = GetOnlineFeaturesResponse.FeatureVector -@pytest.fixture(scope="module") -def proto_json_patch(): - proto_json.patch() - - def test_feature_vector_values(proto_json_patch): # FeatureVector contains "repeated values" proto field. # We want to test that feast.types.Value can take different types in JSON @@ -106,3 +101,8 @@ def test_feature_list(proto_json_patch): assertpy.assert_that(feature_list_json).is_equal_to( ["feature-a", "feature-b", "feature-c"] ) + + +@pytest.fixture(scope="module") +def proto_json_patch(): + proto_json.patch() diff --git a/sdk/python/tests/utils/feature_store_test_utils.py b/sdk/python/tests/utils/feature_store_test_utils.py index 67866ffcaa..acc08ec121 100644 --- a/sdk/python/tests/utils/feature_store_test_utils.py +++ b/sdk/python/tests/utils/feature_store_test_utils.py @@ -6,7 +6,7 @@ from pandas.testing import assert_frame_equal as pd_assert_frame_equal from pytz import utc -from feast import utils +from feast import FeatureStore, utils from feast.errors import FeatureNameCollisionError from feast.feature_view import FeatureView @@ -397,3 +397,100 @@ def _get_feature_view_ttl( ) -> timedelta: """Returns the ttl of a feature view if it is non-zero. Otherwise returns the specified default.""" return feature_view.ttl if feature_view.ttl else default_ttl + + +def validate_online_features( + store: FeatureStore, driver_df: pd.DataFrame, max_date: datetime +): + """Assert that features in online store are up to date with `max_date` date.""" + # Read features back + response = store.get_online_features( + features=[ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:avg_daily_trips", + "global_daily_stats:num_rides", + "global_daily_stats:avg_ride_length", + ], + entity_rows=[{"driver_id": 1001}], + full_feature_names=True, + ) + + # Float features should still be floats. + assert ( + response.proto.results[ + list(response.proto.metadata.feature_names.val).index( + "driver_hourly_stats__conv_rate" + ) + ] + .values[0] + .float_val + > 0 + ), response.to_dict() + + result = response.to_dict() + assert len(result) == 5 + assert "driver_hourly_stats__avg_daily_trips" in result + assert "driver_hourly_stats__conv_rate" in result + assert ( + abs( + result["driver_hourly_stats__conv_rate"][0] + - get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] + ) + < 0.01 + ) + assert "global_daily_stats__num_rides" in result + assert "global_daily_stats__avg_ride_length" in result + + # Test the ODFV if it exists. + odfvs = store.list_on_demand_feature_views() + if odfvs and odfvs[0].name == "conv_rate_plus_100": + response = store.get_online_features( + features=[ + "conv_rate_plus_100:conv_rate_plus_100", + "conv_rate_plus_100:conv_rate_plus_val_to_add", + ], + entity_rows=[{"driver_id": 1001, "val_to_add": 100}], + full_feature_names=True, + ) + + # Check that float64 feature is stored correctly in proto format. + assert ( + response.proto.results[ + list(response.proto.metadata.feature_names.val).index( + "conv_rate_plus_100__conv_rate_plus_100" + ) + ] + .values[0] + .double_val + > 0 + ) + + result = response.to_dict() + assert len(result) == 3 + assert "conv_rate_plus_100__conv_rate_plus_100" in result + assert "conv_rate_plus_100__conv_rate_plus_val_to_add" in result + assert ( + abs( + result["conv_rate_plus_100__conv_rate_plus_100"][0] + - (get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] + 100) + ) + < 0.01 + ) + assert ( + abs( + result["conv_rate_plus_100__conv_rate_plus_val_to_add"][0] + - (get_last_feature_row(driver_df, 1001, max_date)["conv_rate"] + 100) + ) + < 0.01 + ) + + +def get_last_feature_row(df: pd.DataFrame, driver_id, max_date: datetime): + """Manually extract last feature value from a dataframe for a given driver_id with up to `max_date` date""" + filtered = df[ + (df["driver_id"] == driver_id) + & (df["event_timestamp"] < max_date.replace(tzinfo=utc)) + ] + max_ts = filtered.loc[filtered["event_timestamp"].idxmax()]["event_timestamp"] + filtered_by_ts = filtered[filtered["event_timestamp"] == max_ts] + return filtered_by_ts.loc[filtered_by_ts["created"].idxmax()] diff --git a/sdk/python/tests/utils/online_read_write_test.py b/sdk/python/tests/utils/online_read_write_test_utils.py similarity index 100% rename from sdk/python/tests/utils/online_read_write_test.py rename to sdk/python/tests/utils/online_read_write_test_utils.py diff --git a/sdk/python/tests/utils/online_store_utils.py b/sdk/python/tests/utils/online_store_utils.py index 9cd7663869..4d21309218 100644 --- a/sdk/python/tests/utils/online_store_utils.py +++ b/sdk/python/tests/utils/online_store_utils.py @@ -8,7 +8,7 @@ from feast.protos.feast.types.Value_pb2 import Value as ValueProto -def _create_n_customer_test_samples(n=10): +def create_n_customer_test_samples(n=10): return [ ( EntityKeyProto( @@ -26,7 +26,7 @@ def _create_n_customer_test_samples(n=10): ] -def _create_test_table(project, tbl_name, region): +def create_test_table(project, tbl_name, region): client = boto3.client("dynamodb", region_name=region) client.create_table( TableName=f"{project}.{tbl_name}", @@ -36,12 +36,12 @@ def _create_test_table(project, tbl_name, region): ) -def _delete_test_table(project, tbl_name, region): +def delete_test_table(project, tbl_name, region): client = boto3.client("dynamodb", region_name=region) client.delete_table(TableName=f"{project}.{tbl_name}") -def _insert_data_test_table(data, project, tbl_name, region): +def insert_data_test_table(data, project, tbl_name, region): dynamodb_resource = boto3.resource("dynamodb", region_name=region) table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}") for entity_key, features, timestamp, created_ts in data: @@ -53,4 +53,4 @@ def _insert_data_test_table(data, project, tbl_name, region): "event_ts": str(utils.make_tzaware(timestamp)), "values": {k: v.SerializeToString() for k, v in features.items()}, } - ) + ) \ No newline at end of file diff --git a/sdk/python/tests/utils/online_write_benchmark.py b/sdk/python/tests/utils/online_write_benchmark_utils.py similarity index 100% rename from sdk/python/tests/utils/online_write_benchmark.py rename to sdk/python/tests/utils/online_write_benchmark_utils.py diff --git a/sdk/python/tests/utils/test_wrapper_utils.py b/sdk/python/tests/utils/test_wrapper_utils.py new file mode 100644 index 0000000000..efee675790 --- /dev/null +++ b/sdk/python/tests/utils/test_wrapper_utils.py @@ -0,0 +1,14 @@ +import pytest + + +def no_warnings(func): + def wrapper_no_warnings(*args, **kwargs): + with pytest.warns(None) as warnings: + func(*args, **kwargs) + + if len(warnings) > 0: + raise AssertionError( + "Warnings were raised: " + ", ".join([str(w) for w in warnings]) + ) + + return wrapper_no_warnings From 374d0afd8a0b4ceb4798199fbdfd77da5a2cc87b Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 13:49:46 -0700 Subject: [PATCH 06/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/utils/online_store_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/tests/utils/online_store_utils.py b/sdk/python/tests/utils/online_store_utils.py index 4d21309218..20bac122b3 100644 --- a/sdk/python/tests/utils/online_store_utils.py +++ b/sdk/python/tests/utils/online_store_utils.py @@ -53,4 +53,4 @@ def insert_data_test_table(data, project, tbl_name, region): "event_ts": str(utils.make_tzaware(timestamp)), "values": {k: v.SerializeToString() for k, v in features.items()}, } - ) \ No newline at end of file + ) From 826524bde0db95b2f454e215a7d15fa2d5c876a1 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 14:25:41 -0700 Subject: [PATCH 07/30] Address review Signed-off-by: Kevin Zhang --- sdk/python/tests/utils/feature_test_utils.py | 32 ++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/sdk/python/tests/utils/feature_test_utils.py b/sdk/python/tests/utils/feature_test_utils.py index a3c9624b9e..5c7d5524c8 100644 --- a/sdk/python/tests/utils/feature_test_utils.py +++ b/sdk/python/tests/utils/feature_test_utils.py @@ -15,19 +15,21 @@ from feast.feature_logging import LOG_DATE_FIELD, LOG_TIMESTAMP_FIELD, REQUEST_ID_FIELD from feast.protos.feast.serving.ServingService_pb2 import FieldStatus -""" -Return latest rows in a dataframe based on join key and entity values. -""" - def get_latest_rows(df, join_key, entity_values): - rows = df[df[join_key].isin(entity_values)] - return rows.loc[rows.groupby(join_key)["event_timestamp"].idxmax()] + """ + Return latest rows in a dataframe based on join key and entity values. + Args: + df: Dataframe of features values. + join_key : Join key for the feature values in the dataframe. + entity_values : Entity values for the feature values in the dataframe. -""" -Given dataframe and feature view, generate the expected logging dataframes that would be otherwise generated by our logging infrastructure. -""" + Returns: + The most recent row in the dataframe. + """ + rows = df[df[join_key].isin(entity_values)] + return rows.loc[rows.groupby(join_key)["event_timestamp"].idxmax()] def generate_expected_logs( @@ -37,6 +39,18 @@ def generate_expected_logs( join_keys: List[str], timestamp_column: str, ): + """ + Given dataframe and feature view, generate the expected logging dataframes that would be otherwise generated by our logging infrastructure. + Args: + df: Dataframe of features values returned in `get_online_features`. + feature_view : The feature view from which the features were retrieved. + features : The list of features defined as part of this base feature view. + join_keys : Join keys for the retrieved features. + timestamp_column : Timestamp column + + Returns: + Returns dataframe containing the expected logs. + """ logs = pd.DataFrame() for join_key in join_keys: logs[join_key] = df[join_key] From 11a059cd9a7195ee55c65023b1bc606fbf912171 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 14:48:53 -0700 Subject: [PATCH 08/30] Fix Signed-off-by: Kevin Zhang --- .../integration/registration/test_cli.py | 71 ++++++++++++++++- sdk/python/tests/utils/e2e_test_utils.py | 77 ------------------- 2 files changed, 69 insertions(+), 79 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index e4f78c95e3..fe74c2069f 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -1,5 +1,6 @@ import tempfile import uuid +from contextlib import contextmanager from pathlib import Path from textwrap import dedent @@ -12,8 +13,6 @@ from tests.utils.e2e_test_utils import ( NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, - setup_third_party_provider_repo, - setup_third_party_registry_store_repo, ) from tests.utils.online_read_write_test_utils import basic_rw_test @@ -232,3 +231,71 @@ def test_3rd_party_registry_store() -> None: ) as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(0) + + +@contextmanager +def setup_third_party_provider_repo(provider_name: str): + with tempfile.TemporaryDirectory() as repo_dir_name: + + # Construct an example repo in a temporary dir + repo_path = Path(repo_dir_name) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text( + dedent( + f""" + project: foo + registry: data/registry.db + provider: {provider_name} + online_store: + path: data/online_store.db + type: sqlite + offline_store: + type: file + """ + ) + ) + + (repo_path / "foo").mkdir() + repo_example = repo_path / "foo/provider.py" + repo_example.write_text( + (Path(__file__).parents[2] / "foo_provider.py").read_text() + ) + + yield repo_path + + +@contextmanager +def setup_third_party_registry_store_repo(registry_store: str): + with tempfile.TemporaryDirectory() as repo_dir_name: + + # Construct an example repo in a temporary dir + repo_path = Path(repo_dir_name) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text( + dedent( + f""" + project: foo + registry: + registry_store_type: {registry_store} + path: foobar://foo.bar + provider: local + online_store: + path: data/online_store.db + type: sqlite + offline_store: + type: file + """ + ) + ) + + (repo_path / "foo").mkdir() + repo_example = repo_path / "foo/registry_store.py" + repo_example.write_text( + (Path(__file__).parents[2] / "foo_registry_store.py").read_text() + ) + + yield repo_path diff --git a/sdk/python/tests/utils/e2e_test_utils.py b/sdk/python/tests/utils/e2e_test_utils.py index 7d898f9505..7b841afff2 100644 --- a/sdk/python/tests/utils/e2e_test_utils.py +++ b/sdk/python/tests/utils/e2e_test_utils.py @@ -1,11 +1,8 @@ import math import os -import tempfile import time -from contextlib import contextmanager from datetime import datetime, timedelta from pathlib import Path -from textwrap import dedent from typing import List, Optional import pandas as pd @@ -219,74 +216,6 @@ def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): ) -@contextmanager -def setup_third_party_provider_repo(provider_name: str): - with tempfile.TemporaryDirectory() as repo_dir_name: - - # Construct an example repo in a temporary dir - repo_path = Path(repo_dir_name) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text( - dedent( - f""" - project: foo - registry: data/registry.db - provider: {provider_name} - online_store: - path: data/online_store.db - type: sqlite - offline_store: - type: file - """ - ) - ) - - (repo_path / "foo").mkdir() - repo_example = repo_path / "foo/provider.py" - repo_example.write_text( - (Path(__file__).parents[2] / "foo_provider.py").read_text() - ) - - yield repo_path - - -@contextmanager -def setup_third_party_registry_store_repo(registry_store: str): - with tempfile.TemporaryDirectory() as repo_dir_name: - - # Construct an example repo in a temporary dir - repo_path = Path(repo_dir_name) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text( - dedent( - f""" - project: foo - registry: - registry_store_type: {registry_store} - path: foobar://foo.bar - provider: local - online_store: - path: data/online_store.db - type: sqlite - offline_store: - type: file - """ - ) - ) - - (repo_path / "foo").mkdir() - repo_example = repo_path / "foo/registry_store.py" - repo_example.write_text( - (Path(__file__).parents[2] / "foo_registry_store.py").read_text() - ) - - yield repo_path - - def validate_registry_data_source_apply(test_registry: Registry): # Create Feature Views batch_source = FileSource( @@ -346,9 +275,3 @@ def validate_registry_data_source_apply(test_registry: Registry): # Will try to reload registry, which will fail because the file has been deleted with pytest.raises(FileNotFoundError): test_registry._get_registry_proto(project=project) - - -def validate_project_uuid(project_uuid, test_registry): - assert len(test_registry.cached_registry_proto.project_metadata) == 1 - project_metadata = test_registry.cached_registry_proto.project_metadata[0] - assert project_metadata.project_uuid == project_uuid From 56c04f0c7e2b0bc713c6381766b6d5e84e369a9f Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 15:15:02 -0700 Subject: [PATCH 09/30] Address review Signed-off-by: Kevin Zhang --- .../e2e}/test_e2e_local.py | 2 + sdk/python/tests/utils/e2e_test_utils.py | 128 +++++++++--------- 2 files changed, 66 insertions(+), 64 deletions(-) rename sdk/python/tests/{unit/infra/online_store => integration/e2e}/test_e2e_local.py (99%) diff --git a/sdk/python/tests/unit/infra/online_store/test_e2e_local.py b/sdk/python/tests/integration/e2e/test_e2e_local.py similarity index 99% rename from sdk/python/tests/unit/infra/online_store/test_e2e_local.py rename to sdk/python/tests/integration/e2e/test_e2e_local.py index 0c4cf36563..262e7687c1 100644 --- a/sdk/python/tests/unit/infra/online_store/test_e2e_local.py +++ b/sdk/python/tests/integration/e2e/test_e2e_local.py @@ -4,6 +4,7 @@ from pathlib import Path import pandas as pd +import pytest from feast.driver_test_data import ( create_driver_hourly_stats_df, @@ -14,6 +15,7 @@ from tests.utils.feature_store_test_utils import validate_online_features +@pytest.mark.integration def test_e2e_local() -> None: """ Tests the end-to-end workflow of apply, materialize, and online retrieval. diff --git a/sdk/python/tests/utils/e2e_test_utils.py b/sdk/python/tests/utils/e2e_test_utils.py index 7b841afff2..b2eb78f3c8 100644 --- a/sdk/python/tests/utils/e2e_test_utils.py +++ b/sdk/python/tests/utils/e2e_test_utils.py @@ -33,7 +33,70 @@ ) -def check_offline_and_online_features( +def validate_offline_online_store_consistency( + fs: FeatureStore, fv: FeatureView, split_dt: datetime +) -> None: + now = datetime.utcnow() + + full_feature_names = True + check_offline_store: bool = True + + # Run materialize() + # use both tz-naive & tz-aware timestamps to test that they're both correctly handled + start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) + end_date = split_dt + fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) + + time.sleep(10) + + # check result of materialize() + _check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=1, + event_timestamp=end_date, + expected_value=0.3, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + _check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=2, + event_timestamp=end_date, + expected_value=None, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + # check prior value for materialize_incremental() + _check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=3, + event_timestamp=end_date, + expected_value=4, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + # run materialize_incremental() + fs.materialize_incremental(feature_views=[fv.name], end_date=now) + + # check result of materialize_incremental() + _check_offline_and_online_features( + fs=fs, + fv=fv, + driver_id=3, + event_timestamp=now, + expected_value=5, + full_feature_names=full_feature_names, + check_offline_store=check_offline_store, + ) + + +def _check_offline_and_online_features( fs: FeatureStore, fv: FeatureView, driver_id: int, @@ -101,69 +164,6 @@ def check_offline_and_online_features( ) -def validate_offline_online_store_consistency( - fs: FeatureStore, fv: FeatureView, split_dt: datetime -) -> None: - now = datetime.utcnow() - - full_feature_names = True - check_offline_store: bool = True - - # Run materialize() - # use both tz-naive & tz-aware timestamps to test that they're both correctly handled - start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) - end_date = split_dt - fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) - - time.sleep(10) - - # check result of materialize() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=1, - event_timestamp=end_date, - expected_value=0.3, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=2, - event_timestamp=end_date, - expected_value=None, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # check prior value for materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=end_date, - expected_value=4, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - # run materialize_incremental() - fs.materialize_incremental(feature_views=[fv.name], end_date=now) - - # check result of materialize_incremental() - check_offline_and_online_features( - fs=fs, - fv=fv, - driver_id=3, - event_timestamp=now, - expected_value=5, - full_feature_names=full_feature_names, - check_offline_store=check_offline_store, - ) - - def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) From 7b6ee9ff58380318ee8a3548d33777b019a9cfea Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Wed, 27 Jul 2022 15:42:30 -0700 Subject: [PATCH 10/30] Fix Signed-off-by: Kevin Zhang --- sdk/python/tests/utils/feature_test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/utils/feature_test_utils.py b/sdk/python/tests/utils/feature_test_utils.py index 5c7d5524c8..dc18b8d631 100644 --- a/sdk/python/tests/utils/feature_test_utils.py +++ b/sdk/python/tests/utils/feature_test_utils.py @@ -16,7 +16,7 @@ from feast.protos.feast.serving.ServingService_pb2 import FieldStatus -def get_latest_rows(df, join_key, entity_values): +def get_latest_rows(df: pd.DataFrame, join_key: str, entity_values: List[str]) -> pd.DataFrame: """ Return latest rows in a dataframe based on join key and entity values. @@ -38,7 +38,7 @@ def generate_expected_logs( features: List[str], join_keys: List[str], timestamp_column: str, -): +) -> pd.DataFrame: """ Given dataframe and feature view, generate the expected logging dataframes that would be otherwise generated by our logging infrastructure. Args: From 48354990fc5ec0987997d9a29a90cadb02f4fd0d Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 09:29:05 -0700 Subject: [PATCH 11/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/utils/feature_test_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/tests/utils/feature_test_utils.py b/sdk/python/tests/utils/feature_test_utils.py index dc18b8d631..ec0d92814c 100644 --- a/sdk/python/tests/utils/feature_test_utils.py +++ b/sdk/python/tests/utils/feature_test_utils.py @@ -16,7 +16,9 @@ from feast.protos.feast.serving.ServingService_pb2 import FieldStatus -def get_latest_rows(df: pd.DataFrame, join_key: str, entity_values: List[str]) -> pd.DataFrame: +def get_latest_rows( + df: pd.DataFrame, join_key: str, entity_values: List[str] +) -> pd.DataFrame: """ Return latest rows in a dataframe based on join key and entity values. From 44164f8921dc453045a417504bff4a35f5778989 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 11:28:34 -0700 Subject: [PATCH 12/30] address review Signed-off-by: Kevin Zhang --- .../integration/e2e/test_go_feature_server.py | 3 +- .../integration/e2e/test_universal_e2e.py | 44 +++ .../registration/test_feature_store.py | 259 +---------------- .../registration/test_universal_cli.py | 138 +++++++++ .../scaffolding/test_partial_apply.py | 48 ---- .../registration => unit/cli}/test_cli.py | 131 --------- .../cli}/test_cli_apply_duplicates.py | 7 - .../cli}/test_cli_chdir.py | 3 - .../unit/infra/test_inference_unit_tests.py | 18 +- .../tests/unit/infra/test_local_registry.py | 2 +- .../local_feast_tests}/test_e2e_local.py | 0 .../test_feature_service_apply.py | 3 - .../test_feature_service_read.py | 0 .../local_feast_tests}/test_init.py | 4 - .../test_local_feature_store.py | 266 ++++++++++++++++++ .../test_stream_feature_view_apply.py | 4 - ...ttp_utils.py => http_server_test_utils.py} | 0 ...test_wrapper_utils.py => test_wrappers.py} | 0 18 files changed, 458 insertions(+), 472 deletions(-) create mode 100644 sdk/python/tests/integration/registration/test_universal_cli.py delete mode 100644 sdk/python/tests/integration/scaffolding/test_partial_apply.py rename sdk/python/tests/{integration/registration => unit/cli}/test_cli.py (51%) rename sdk/python/tests/{integration/registration => unit/cli}/test_cli_apply_duplicates.py (97%) rename sdk/python/tests/{integration/registration => unit/cli}/test_cli_chdir.py (97%) rename sdk/python/tests/{integration/e2e => unit/local_feast_tests}/test_e2e_local.py (100%) rename sdk/python/tests/{integration/registration => unit/local_feast_tests}/test_feature_service_apply.py (95%) rename sdk/python/tests/{integration/online_store => unit/local_feast_tests}/test_feature_service_read.py (100%) rename sdk/python/tests/{integration/scaffolding => unit/local_feast_tests}/test_init.py (97%) create mode 100644 sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py rename sdk/python/tests/{integration/registration => unit/local_feast_tests}/test_stream_feature_view_apply.py (98%) rename sdk/python/tests/utils/{http_utils.py => http_server_test_utils.py} (100%) rename sdk/python/tests/utils/{test_wrapper_utils.py => test_wrappers.py} (100%) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index fb28462864..3e48c4bef8 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -9,10 +9,10 @@ import pytz import requests -from feast import FeatureService, ValueType from feast.embedded_go.online_features_service import EmbeddedOnlineFeatureServer from feast.feast_object import FeastObject from feast.feature_logging import LoggingConfig +from feast.feature_service import FeatureService from feast.infra.feature_servers.base_config import FeatureLoggingConfig from feast.protos.feast.serving.ServingService_pb2 import ( FieldStatus, @@ -22,6 +22,7 @@ from feast.protos.feast.serving.ServingService_pb2_grpc import ServingServiceStub from feast.protos.feast.types.Value_pb2 import RepeatedValue from feast.type_map import python_values_to_proto_values +from feast.value_type import ValueType from feast.wait import wait_retry_backoff from tests.integration.feature_repos.repo_configuration import ( construct_universal_feature_views, diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 122d345ce4..4ba93dfef0 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -2,9 +2,13 @@ import pytest +from feast import BigQuerySource, Entity, FeatureView, Field +from feast.types import Float32, String from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view +from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.e2e_test_utils import validate_offline_online_store_consistency +from tests.utils.online_read_write_test_utils import basic_rw_test @pytest.mark.integration @@ -27,3 +31,43 @@ def test_e2e_consistency(environment, e2e_data_sources, infer_features): split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) validate_offline_online_store_consistency(fs, fv, split_dt) + + +@pytest.mark.integration +def test_partial() -> None: + """ + Add another table to existing repo using partial apply API. Make sure both the table + applied via CLI apply and the new table are passing RW test. + """ + + runner = CliRunner() + with runner.local_repo( + get_example_repo("example_feature_repo_1.py"), "bigquery" + ) as store: + driver = Entity(name="driver", join_keys=["test"]) + + driver_locations_source = BigQuerySource( + table="feast-oss.public.drivers", + timestamp_field="event_timestamp", + created_timestamp_column="created_timestamp", + ) + + driver_locations_100 = FeatureView( + name="driver_locations_100", + entities=[driver], + ttl=timedelta(days=1), + schema=[ + Field(name="lat", dtype=Float32), + Field(name="lon", dtype=String), + Field(name="name", dtype=String), + Field(name="test", dtype=String), + ], + online=True, + batch_source=driver_locations_source, + tags={}, + ) + + store.apply([driver_locations_100]) + + basic_rw_test(store, view_name="driver_locations") + basic_rw_test(store, view_name="driver_locations_100") diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 25f622db72..f8c266e361 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -13,8 +13,7 @@ # limitations under the License. import os import time -from datetime import datetime, timedelta -from tempfile import mkstemp +from datetime import timedelta import pytest from pytest_lazyfixture import lazy_fixture @@ -27,7 +26,6 @@ from feast.field import Field from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig -from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig from feast.types import Array, Bytes, Float64, Int64, String from tests.utils.data_source_utils import ( @@ -37,35 +35,6 @@ ) -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [lazy_fixture("feature_store_with_local_registry")], -) -def test_apply_entity_success(test_feature_store): - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - # Register Entity - test_feature_store.apply(entity) - - entities = test_feature_store.list_entities() - - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - test_feature_store.teardown() - - @pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", @@ -106,61 +75,6 @@ def test_apply_entity_integration(test_feature_store): test_feature_store.teardown() -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [lazy_fixture("feature_store_with_local_registry")], -) -def test_apply_feature_view_success(test_feature_store): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - date_partition_column="date_partition_col", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["entity_id"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - Field(name="entity_id", dtype=Int64), - ], - entities=[entity], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - # Register Feature View - test_feature_store.apply([entity, fv1]) - - feature_views = test_feature_store.list_feature_views() - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == String - and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == Array(String) - and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == Array(Bytes) - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - test_feature_store.teardown() - - @pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", @@ -304,177 +218,6 @@ def test_apply_feature_view_integration(test_feature_store): test_feature_store.teardown() -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [lazy_fixture("feature_store_with_local_registry")], -) -def test_apply_object_and_read(test_feature_store): - assert isinstance(test_feature_store, FeatureStore) - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - e1 = Entity(name="fs1_my_entity_1", description="something") - - e2 = Entity(name="fs1_my_entity_2", description="something") - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - Field(name="fs1_my_entity_1", dtype=Int64), - ], - entities=[e1], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - fv2 = FeatureView( - name="my_feature_view_2", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - Field(name="fs1_my_entity_2", dtype=Int64), - ], - entities=[e2], - tags={"team": "matchmaking"}, - batch_source=batch_source, - ttl=timedelta(minutes=5), - ) - - # Register Feature View - test_feature_store.apply([fv1, e1, fv2, e2]) - - fv1_actual = test_feature_store.get_feature_view("my_feature_view_1") - e1_actual = test_feature_store.get_entity("fs1_my_entity_1") - - assert e1 == e1_actual - assert fv2 != fv1_actual - assert e2 != e1_actual - - test_feature_store.teardown() - - -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [lazy_fixture("feature_store_with_local_registry")], -) -@pytest.mark.parametrize("dataframe_source", [lazy_fixture("simple_dataset_1")]) -def test_reapply_feature_view_success(test_feature_store, dataframe_source): - with prep_file_source(df=dataframe_source, timestamp_field="ts_1") as file_source: - - e = Entity(name="id", join_keys=["id_join_key"]) - - # Create Feature View - fv1 = FeatureView( - name="my_feature_view_1", - schema=[Field(name="string_col", dtype=String)], - entities=[e], - batch_source=file_source, - ttl=timedelta(minutes=5), - ) - - # Register Feature View - test_feature_store.apply([fv1, e]) - - # Check Feature View - fv_stored = test_feature_store.get_feature_view(fv1.name) - assert len(fv_stored.materialization_intervals) == 0 - - # Run materialization - test_feature_store.materialize(datetime(2020, 1, 1), datetime(2021, 1, 1)) - - # Check Feature View - fv_stored = test_feature_store.get_feature_view(fv1.name) - assert len(fv_stored.materialization_intervals) == 1 - - # Apply again - test_feature_store.apply([fv1]) - - # Check Feature View - fv_stored = test_feature_store.get_feature_view(fv1.name) - assert len(fv_stored.materialization_intervals) == 1 - - # Change and apply Feature View - fv1 = FeatureView( - name="my_feature_view_1", - schema=[Field(name="int64_col", dtype=Int64)], - entities=[e], - batch_source=file_source, - ttl=timedelta(minutes=5), - ) - test_feature_store.apply([fv1]) - - # Check Feature View - fv_stored = test_feature_store.get_feature_view(fv1.name) - assert len(fv_stored.materialization_intervals) == 0 - - test_feature_store.teardown() - - -@pytest.mark.integration -def test_apply_conflicting_featureview_names(feature_store_with_local_registry): - """Test applying feature views with non-case-insensitively unique names""" - driver = Entity(name="driver", join_keys=["driver_id"]) - customer = Entity(name="customer", join_keys=["customer_id"]) - - driver_stats = FeatureView( - name="driver_hourly_stats", - entities=[driver], - ttl=timedelta(seconds=10), - online=False, - batch_source=FileSource(path="driver_stats.parquet"), - tags={}, - ) - - customer_stats = FeatureView( - name="DRIVER_HOURLY_STATS", - entities=[customer], - ttl=timedelta(seconds=10), - online=False, - batch_source=FileSource(path="customer_stats.parquet"), - tags={}, - ) - try: - feature_store_with_local_registry.apply([driver_stats, customer_stats]) - error = None - except ValueError as e: - error = e - assert ( - isinstance(error, ValueError) - and "Please ensure that all feature view names are case-insensitively unique" - in error.args[0] - ) - - feature_store_with_local_registry.teardown() - - -@pytest.fixture -def feature_store_with_local_registry(): - fd, registry_path = mkstemp() - fd, online_store_path = mkstemp() - return FeatureStore( - config=RepoConfig( - registry=registry_path, - project="default", - provider="local", - online_store=SqliteOnlineStoreConfig(path=online_store_path), - ) - ) - - @pytest.fixture def feature_store_with_gcs_registry(): from google.cloud import storage diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py new file mode 100644 index 0000000000..20ec962e6f --- /dev/null +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -0,0 +1,138 @@ +import tempfile +import uuid +from pathlib import Path +from textwrap import dedent + +import pytest +from assertpy import assertpy + +from feast import FeatureStore +from tests.integration.feature_repos.repo_configuration import Environment +from tests.utils.cli_utils import CliRunner, get_example_repo +from tests.utils.e2e_test_utils import make_feature_store_yaml +from tests.utils.online_read_write_test_utils import basic_rw_test + + +@pytest.mark.integration +@pytest.mark.universal_offline_stores +def test_universal_cli(environment: Environment): + project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" + runner = CliRunner() + + with tempfile.TemporaryDirectory() as repo_dir_name: + try: + repo_path = Path(repo_dir_name) + feature_store_yaml = make_feature_store_yaml( + project, environment.test_repo_config, repo_path + ) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text(dedent(feature_store_yaml)) + + repo_example = repo_path / "example.py" + repo_example.write_text(get_example_repo("example_feature_repo_1.py")) + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + + # Store registry contents, to be compared later. + fs = FeatureStore(repo_path=str(repo_path)) + registry_dict = fs.registry.to_dict(project=project) + # Save only the specs, not the metadata. + registry_specs = { + key: [fco["spec"] if "spec" in fco else fco for fco in value] + for key, value in registry_dict.items() + } + + # entity & feature view list commands should succeed + result = runner.run(["entities", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run(["feature-views", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run(["feature-services", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run(["data-sources", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + + # entity & feature view describe commands should succeed when objects exist + result = runner.run(["entities", "describe", "driver"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run( + ["feature-views", "describe", "driver_locations"], cwd=repo_path + ) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run( + ["feature-services", "describe", "driver_locations_service"], + cwd=repo_path, + ) + assertpy.assert_that(result.returncode).is_equal_to(0) + assertpy.assert_that(fs.list_feature_views()).is_length(4) + result = runner.run( + ["data-sources", "describe", "customer_profile_source"], + cwd=repo_path, + ) + assertpy.assert_that(result.returncode).is_equal_to(0) + assertpy.assert_that(fs.list_data_sources()).is_length(4) + + # entity & feature view describe commands should fail when objects don't exist + result = runner.run(["entities", "describe", "foo"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(1) + result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(1) + result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(1) + result = runner.run(["data-sources", "describe", "foo"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(1) + + # Doing another apply should be a no op, and should not cause errors + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + basic_rw_test( + FeatureStore(repo_path=str(repo_path), config=None), + view_name="driver_locations", + ) + + # Confirm that registry contents have not changed. + registry_dict = fs.registry.to_dict(project=project) + assertpy.assert_that(registry_specs).is_equal_to( + { + key: [fco["spec"] if "spec" in fco else fco for fco in value] + for key, value in registry_dict.items() + } + ) + + result = runner.run(["teardown"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + finally: + runner.run(["teardown"], cwd=repo_path) + + +@pytest.mark.integration +@pytest.mark.universal_offline_stores +def test_odfv_apply(environment) -> None: + project = f"test_odfv_apply{str(uuid.uuid4()).replace('-', '')[:8]}" + runner = CliRunner() + + with tempfile.TemporaryDirectory() as repo_dir_name: + try: + repo_path = Path(repo_dir_name) + feature_store_yaml = make_feature_store_yaml( + project, environment.test_repo_config, repo_path + ) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text(dedent(feature_store_yaml)) + + repo_example = repo_path / "example.py" + repo_example.write_text(get_example_repo("on_demand_feature_view_repo.py")) + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + + # entity & feature view list commands should succeed + result = runner.run(["entities", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + result = runner.run(["on-demand-feature-views", "list"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + finally: + runner.run(["teardown"], cwd=repo_path) diff --git a/sdk/python/tests/integration/scaffolding/test_partial_apply.py b/sdk/python/tests/integration/scaffolding/test_partial_apply.py deleted file mode 100644 index 810da0776b..0000000000 --- a/sdk/python/tests/integration/scaffolding/test_partial_apply.py +++ /dev/null @@ -1,48 +0,0 @@ -from datetime import timedelta - -import pytest - -from feast import BigQuerySource, Entity, FeatureView, Field -from feast.types import Float32, String -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.online_read_write_test_utils import basic_rw_test - - -@pytest.mark.integration -def test_partial() -> None: - """ - Add another table to existing repo using partial apply API. Make sure both the table - applied via CLI apply and the new table are passing RW test. - """ - - runner = CliRunner() - with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "bigquery" - ) as store: - driver = Entity(name="driver", join_keys=["test"]) - - driver_locations_source = BigQuerySource( - table="feast-oss.public.drivers", - timestamp_field="event_timestamp", - created_timestamp_column="created_timestamp", - ) - - driver_locations_100 = FeatureView( - name="driver_locations_100", - entities=[driver], - ttl=timedelta(days=1), - schema=[ - Field(name="lat", dtype=Float32), - Field(name="lon", dtype=String), - Field(name="name", dtype=String), - Field(name="test", dtype=String), - ], - online=True, - batch_source=driver_locations_source, - tags={}, - ) - - store.apply([driver_locations_100]) - - basic_rw_test(store, view_name="driver_locations") - basic_rw_test(store, view_name="driver_locations_100") diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py similarity index 51% rename from sdk/python/tests/integration/registration/test_cli.py rename to sdk/python/tests/unit/cli/test_cli.py index fe74c2069f..f20eea77f7 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -7,111 +7,13 @@ import pytest from assertpy import assertpy -from feast import FeatureStore -from tests.integration.feature_repos.repo_configuration import Environment from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.e2e_test_utils import ( NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, ) -from tests.utils.online_read_write_test_utils import basic_rw_test -@pytest.mark.integration -@pytest.mark.universal_offline_stores -def test_universal_cli(environment: Environment): - project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" - runner = CliRunner() - - with tempfile.TemporaryDirectory() as repo_dir_name: - try: - repo_path = Path(repo_dir_name) - feature_store_yaml = make_feature_store_yaml( - project, environment.test_repo_config, repo_path - ) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text(dedent(feature_store_yaml)) - - repo_example = repo_path / "example.py" - repo_example.write_text(get_example_repo("example_feature_repo_1.py")) - result = runner.run(["apply"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - - # Store registry contents, to be compared later. - fs = FeatureStore(repo_path=str(repo_path)) - registry_dict = fs.registry.to_dict(project=project) - # Save only the specs, not the metadata. - registry_specs = { - key: [fco["spec"] if "spec" in fco else fco for fco in value] - for key, value in registry_dict.items() - } - - # entity & feature view list commands should succeed - result = runner.run(["entities", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run(["feature-views", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run(["feature-services", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run(["data-sources", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - - # entity & feature view describe commands should succeed when objects exist - result = runner.run(["entities", "describe", "driver"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run( - ["feature-views", "describe", "driver_locations"], cwd=repo_path - ) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run( - ["feature-services", "describe", "driver_locations_service"], - cwd=repo_path, - ) - assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_feature_views()).is_length(4) - result = runner.run( - ["data-sources", "describe", "customer_profile_source"], - cwd=repo_path, - ) - assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_data_sources()).is_length(4) - - # entity & feature view describe commands should fail when objects don't exist - result = runner.run(["entities", "describe", "foo"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(1) - result = runner.run(["feature-views", "describe", "foo"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(1) - result = runner.run(["feature-services", "describe", "foo"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(1) - result = runner.run(["data-sources", "describe", "foo"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(1) - - # Doing another apply should be a no op, and should not cause errors - result = runner.run(["apply"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - basic_rw_test( - FeatureStore(repo_path=str(repo_path), config=None), - view_name="driver_locations", - ) - - # Confirm that registry contents have not changed. - registry_dict = fs.registry.to_dict(project=project) - assertpy.assert_that(registry_specs).is_equal_to( - { - key: [fco["spec"] if "spec" in fco else fco for fco in value] - for key, value in registry_dict.items() - } - ) - - result = runner.run(["teardown"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - finally: - runner.run(["teardown"], cwd=repo_path) - - -@pytest.mark.integration @pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) def test_nullable_online_store(test_nullable_online_store) -> None: project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" @@ -136,38 +38,6 @@ def test_nullable_online_store(test_nullable_online_store) -> None: runner.run(["teardown"], cwd=repo_path) -@pytest.mark.integration -@pytest.mark.universal_offline_stores -def test_odfv_apply(environment) -> None: - project = f"test_odfv_apply{str(uuid.uuid4()).replace('-', '')[:8]}" - runner = CliRunner() - - with tempfile.TemporaryDirectory() as repo_dir_name: - try: - repo_path = Path(repo_dir_name) - feature_store_yaml = make_feature_store_yaml( - project, environment.test_repo_config, repo_path - ) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text(dedent(feature_store_yaml)) - - repo_example = repo_path / "example.py" - repo_example.write_text(get_example_repo("on_demand_feature_view_repo.py")) - result = runner.run(["apply"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - - # entity & feature view list commands should succeed - result = runner.run(["entities", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - result = runner.run(["on-demand-feature-views", "list"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - finally: - runner.run(["teardown"], cwd=repo_path) - - -@pytest.mark.integration def test_3rd_party_providers() -> None: """ Test running apply on third party providers @@ -198,7 +68,6 @@ def test_3rd_party_providers() -> None: assertpy.assert_that(return_code).is_equal_to(0) -@pytest.mark.integration def test_3rd_party_registry_store() -> None: """ Test running apply on third party registry stores diff --git a/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py similarity index 97% rename from sdk/python/tests/integration/registration/test_cli_apply_duplicates.py rename to sdk/python/tests/unit/cli/test_cli_apply_duplicates.py index dd8ba69708..bad3b50a80 100644 --- a/sdk/python/tests/integration/registration/test_cli_apply_duplicates.py +++ b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py @@ -2,12 +2,9 @@ from pathlib import Path from textwrap import dedent -import pytest - from tests.utils.cli_utils import CliRunner, get_example_repo -@pytest.mark.integration def test_cli_apply_duplicated_featureview_names() -> None: run_simple_apply_test( example_repo_file_name="example_feature_repo_with_duplicated_featureview_names.py", @@ -15,7 +12,6 @@ def test_cli_apply_duplicated_featureview_names() -> None: ) -@pytest.mark.integration def test_cli_apply_duplicate_data_source_names() -> None: run_simple_apply_test( example_repo_file_name="example_repo_duplicate_data_source_names.py", @@ -51,7 +47,6 @@ def run_simple_apply_test(example_repo_file_name: str, expected_error: bytes): assert rc != 0 and expected_error in output -@pytest.mark.integration def test_cli_apply_imported_featureview() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI @@ -95,7 +90,6 @@ def test_cli_apply_imported_featureview() -> None: assert b"Created feature service driver_locations_service" in output -@pytest.mark.integration def test_cli_apply_imported_featureview_with_duplication() -> None: """ Test apply feature views with duplicated names and single py file in a feature repo using CLI @@ -149,7 +143,6 @@ def test_cli_apply_imported_featureview_with_duplication() -> None: ) -@pytest.mark.integration def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None: """ Test apply feature views with duplicated names from multiple py files in a feature repo using CLI diff --git a/sdk/python/tests/integration/registration/test_cli_chdir.py b/sdk/python/tests/unit/cli/test_cli_chdir.py similarity index 97% rename from sdk/python/tests/integration/registration/test_cli_chdir.py rename to sdk/python/tests/unit/cli/test_cli_chdir.py index a3c9209e4d..ff26c2f5e2 100644 --- a/sdk/python/tests/integration/registration/test_cli_chdir.py +++ b/sdk/python/tests/unit/cli/test_cli_chdir.py @@ -2,12 +2,9 @@ from datetime import datetime, timedelta from pathlib import Path -import pytest - from tests.utils.cli_utils import CliRunner -@pytest.mark.integration def test_cli_chdir() -> None: """ This test simply makes sure that you can run 'feast --chdir COMMAND' diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index f8cc6689d9..aa4fc10371 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -1,19 +1,12 @@ import pandas as pd import pytest -from feast import ( - BigQuerySource, - Entity, - Feature, - FeatureService, - FileSource, - RedshiftSource, - RepoConfig, - SnowflakeSource, - ValueType, -) +from feast import BigQuerySource, FileSource, RedshiftSource, SnowflakeSource from feast.data_source import RequestSource +from feast.entity import Entity from feast.errors import DataSourceNoNameException, SpecifiedFeaturesNotPresentError +from feast.feature import Feature +from feast.feature_service import FeatureService from feast.feature_view import FeatureView from feast.field import Field from feast.inference import update_feature_views_with_inferred_features_and_entities @@ -21,7 +14,8 @@ SparkSource, ) from feast.on_demand_feature_view import on_demand_feature_view -from feast.types import Float32, Float64, Int64, String, UnixTimestamp +from feast.repo_config import RepoConfig +from feast.types import Float32, Float64, Int64, String, UnixTimestamp, ValueType from tests.utils.data_source_utils import prep_file_source diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index 88ab7d899d..0d9f8df282 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -1,4 +1,4 @@ -# Copyright 2021 The Feast Authors +# Copyright 2022 The Feast Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/sdk/python/tests/integration/e2e/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py similarity index 100% rename from sdk/python/tests/integration/e2e/test_e2e_local.py rename to sdk/python/tests/unit/local_feast_tests/test_e2e_local.py diff --git a/sdk/python/tests/integration/registration/test_feature_service_apply.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py similarity index 95% rename from sdk/python/tests/integration/registration/test_feature_service_apply.py rename to sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py index 7824f6333e..363f1f5ae9 100644 --- a/sdk/python/tests/integration/registration/test_feature_service_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py @@ -1,10 +1,7 @@ -import pytest - from feast import FeatureService from tests.utils.cli_utils import CliRunner, get_example_repo -@pytest.mark.integration def test_read_pre_applied() -> None: """ Read feature values from the FeatureStore using a FeatureService. diff --git a/sdk/python/tests/integration/online_store/test_feature_service_read.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py similarity index 100% rename from sdk/python/tests/integration/online_store/test_feature_service_read.py rename to sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py diff --git a/sdk/python/tests/integration/scaffolding/test_init.py b/sdk/python/tests/unit/local_feast_tests/test_init.py similarity index 97% rename from sdk/python/tests/integration/scaffolding/test_init.py rename to sdk/python/tests/unit/local_feast_tests/test_init.py index 619e7e8625..1cada91ea0 100644 --- a/sdk/python/tests/integration/scaffolding/test_init.py +++ b/sdk/python/tests/unit/local_feast_tests/test_init.py @@ -3,12 +3,9 @@ from pathlib import Path from textwrap import dedent -import pytest - from tests.utils.cli_utils import CliRunner -@pytest.mark.integration def test_repo_init() -> None: """ This test simply makes sure that you can run `feast apply && feast materialize` on @@ -31,7 +28,6 @@ def test_repo_init() -> None: assert result.returncode == 0 -@pytest.mark.integration def test_repo_init_with_underscore_in_project_name() -> None: """ Test `feast init` with underscore in the project name diff --git a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py new file mode 100644 index 0000000000..4ae42eacf1 --- /dev/null +++ b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py @@ -0,0 +1,266 @@ +from datetime import datetime, timedelta +from tempfile import mkstemp + +import pytest +from pytest_lazyfixture import lazy_fixture + +from feast import FileSource +from feast.data_format import ParquetFormat +from feast.entity import Entity +from feast.feature_store import FeatureStore +from feast.feature_view import FeatureView +from feast.field import Field +from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig +from feast.repo_config import RepoConfig +from feast.types import Array, Bytes, Int64, String +from tests.utils.data_source_utils import prep_file_source + + +@pytest.mark.parametrize( + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], +) +def test_apply_entity_success(test_feature_store): + entity = Entity( + name="driver_car_id", + description="Car driver id", + tags={"team": "matchmaking"}, + ) + + # Register Entity + test_feature_store.apply(entity) + + entities = test_feature_store.list_entities() + + entity = entities[0] + assert ( + len(entities) == 1 + and entity.name == "driver_car_id" + and entity.description == "Car driver id" + and "team" in entity.tags + and entity.tags["team"] == "matchmaking" + ) + + test_feature_store.teardown() + + +@pytest.mark.parametrize( + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], +) +def test_apply_feature_view_success(test_feature_store): + # Create Feature Views + batch_source = FileSource( + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + date_partition_column="date_partition_col", + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["entity_id"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + Field(name="entity_id", dtype=Int64), + ], + entities=[entity], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + # Register Feature View + test_feature_store.apply([entity, fv1]) + + feature_views = test_feature_store.list_feature_views() + + # List Feature Views + assert ( + len(feature_views) == 1 + and feature_views[0].name == "my_feature_view_1" + and feature_views[0].features[0].name == "fs1_my_feature_1" + and feature_views[0].features[0].dtype == Int64 + and feature_views[0].features[1].name == "fs1_my_feature_2" + and feature_views[0].features[1].dtype == String + and feature_views[0].features[2].name == "fs1_my_feature_3" + and feature_views[0].features[2].dtype == Array(String) + and feature_views[0].features[3].name == "fs1_my_feature_4" + and feature_views[0].features[3].dtype == Array(Bytes) + and feature_views[0].entities[0] == "fs1_my_entity_1" + ) + + test_feature_store.teardown() + + +@pytest.mark.parametrize( + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], +) +def test_apply_object_and_read(test_feature_store): + assert isinstance(test_feature_store, FeatureStore) + # Create Feature Views + batch_source = FileSource( + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + e1 = Entity(name="fs1_my_entity_1", description="something") + + e2 = Entity(name="fs1_my_entity_2", description="something") + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + Field(name="fs1_my_entity_1", dtype=Int64), + ], + entities=[e1], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + fv2 = FeatureView( + name="my_feature_view_2", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + Field(name="fs1_my_entity_2", dtype=Int64), + ], + entities=[e2], + tags={"team": "matchmaking"}, + batch_source=batch_source, + ttl=timedelta(minutes=5), + ) + + # Register Feature View + test_feature_store.apply([fv1, e1, fv2, e2]) + + fv1_actual = test_feature_store.get_feature_view("my_feature_view_1") + e1_actual = test_feature_store.get_entity("fs1_my_entity_1") + + assert e1 == e1_actual + assert fv2 != fv1_actual + assert e2 != e1_actual + + test_feature_store.teardown() + + +@pytest.mark.parametrize( + "test_feature_store", + [lazy_fixture("feature_store_with_local_registry")], +) +@pytest.mark.parametrize("dataframe_source", [lazy_fixture("simple_dataset_1")]) +def test_reapply_feature_view_success(test_feature_store, dataframe_source): + with prep_file_source(df=dataframe_source, timestamp_field="ts_1") as file_source: + + e = Entity(name="id", join_keys=["id_join_key"]) + + # Create Feature View + fv1 = FeatureView( + name="my_feature_view_1", + schema=[Field(name="string_col", dtype=String)], + entities=[e], + batch_source=file_source, + ttl=timedelta(minutes=5), + ) + + # Register Feature View + test_feature_store.apply([fv1, e]) + + # Check Feature View + fv_stored = test_feature_store.get_feature_view(fv1.name) + assert len(fv_stored.materialization_intervals) == 0 + + # Run materialization + test_feature_store.materialize(datetime(2020, 1, 1), datetime(2021, 1, 1)) + + # Check Feature View + fv_stored = test_feature_store.get_feature_view(fv1.name) + assert len(fv_stored.materialization_intervals) == 1 + + # Apply again + test_feature_store.apply([fv1]) + + # Check Feature View + fv_stored = test_feature_store.get_feature_view(fv1.name) + assert len(fv_stored.materialization_intervals) == 1 + + # Change and apply Feature View + fv1 = FeatureView( + name="my_feature_view_1", + schema=[Field(name="int64_col", dtype=Int64)], + entities=[e], + batch_source=file_source, + ttl=timedelta(minutes=5), + ) + test_feature_store.apply([fv1]) + + # Check Feature View + fv_stored = test_feature_store.get_feature_view(fv1.name) + assert len(fv_stored.materialization_intervals) == 0 + + test_feature_store.teardown() + + +def test_apply_conflicting_featureview_names(feature_store_with_local_registry): + """Test applying feature views with non-case-insensitively unique names""" + driver = Entity(name="driver", join_keys=["driver_id"]) + customer = Entity(name="customer", join_keys=["customer_id"]) + + driver_stats = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(seconds=10), + online=False, + batch_source=FileSource(path="driver_stats.parquet"), + tags={}, + ) + + customer_stats = FeatureView( + name="DRIVER_HOURLY_STATS", + entities=[customer], + ttl=timedelta(seconds=10), + online=False, + batch_source=FileSource(path="customer_stats.parquet"), + tags={}, + ) + try: + feature_store_with_local_registry.apply([driver_stats, customer_stats]) + error = None + except ValueError as e: + error = e + assert ( + isinstance(error, ValueError) + and "Please ensure that all feature view names are case-insensitively unique" + in error.args[0] + ) + + feature_store_with_local_registry.teardown() + + +@pytest.fixture +def feature_store_with_local_registry(): + fd, registry_path = mkstemp() + fd, online_store_path = mkstemp() + return FeatureStore( + config=RepoConfig( + registry=registry_path, + project="default", + provider="local", + online_store=SqliteOnlineStoreConfig(path=online_store_path), + ) + ) diff --git a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py similarity index 98% rename from sdk/python/tests/integration/registration/test_stream_feature_view_apply.py rename to sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 22a8d8a699..685c23018f 100644 --- a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -1,7 +1,5 @@ from datetime import timedelta -import pytest - from feast.aggregation import Aggregation from feast.data_format import AvroFormat from feast.data_source import KafkaSource @@ -13,7 +11,6 @@ from tests.utils.data_source_utils import prep_file_source -@pytest.mark.integration def test_apply_stream_feature_view(simple_dataset_1) -> None: """ Test apply of StreamFeatureView. @@ -80,7 +77,6 @@ def simple_sfv(df): assert features["dummy_field"] == [None] -@pytest.mark.integration def test_stream_feature_view_udf(simple_dataset_1) -> None: """ Test apply of StreamFeatureView udfs are serialized correctly and usable. diff --git a/sdk/python/tests/utils/http_utils.py b/sdk/python/tests/utils/http_server_test_utils.py similarity index 100% rename from sdk/python/tests/utils/http_utils.py rename to sdk/python/tests/utils/http_server_test_utils.py diff --git a/sdk/python/tests/utils/test_wrapper_utils.py b/sdk/python/tests/utils/test_wrappers.py similarity index 100% rename from sdk/python/tests/utils/test_wrapper_utils.py rename to sdk/python/tests/utils/test_wrappers.py From 375d65d6de7381d48f251a726dbf909a11ada4f5 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 11:32:23 -0700 Subject: [PATCH 13/30] Fix Signed-off-by: Kevin Zhang --- sdk/python/tests/conftest.py | 2 +- sdk/python/tests/integration/e2e/test_go_feature_server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index eeb6361ca8..c6b38ed57b 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -43,7 +43,7 @@ from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 FileDataSourceCreator, ) -from tests.utils.http_utils import check_port_open, free_port # noqa: E402 +from tests.utils.http_server_test_utils import check_port_open, free_port # noqa: E402 logger = logging.getLogger(__name__) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 3e48c4bef8..1a0814c990 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -33,7 +33,7 @@ location, ) from tests.utils.feature_test_utils import generate_expected_logs, get_latest_rows -from tests.utils.http_utils import check_port_open, free_port +from tests.utils.http_server_test_utils import check_port_open, free_port @pytest.mark.integration From 0d49d71efd0015dffbbaaa637804b8a6c7187033 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 11:56:34 -0700 Subject: [PATCH 14/30] Fix Signed-off-by: Kevin Zhang --- .../integration/e2e/test_universal_e2e.py | 26 ++++++++++++++++ .../registration/test_universal_cli.py | 29 +++++++++++++++++ sdk/python/tests/unit/cli/test_cli.py | 31 +------------------ .../test_feature_service_apply.py | 26 ---------------- .../test_stream_feature_view_apply.py | 4 +-- sdk/python/tests/unit/test_feature_service.py | 2 +- 6 files changed, 59 insertions(+), 59 deletions(-) delete mode 100644 sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 4ba93dfef0..757078fd36 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -2,6 +2,7 @@ import pytest +from feast.feature_service import FeatureService from feast import BigQuerySource, Entity, FeatureView, Field from feast.types import Float32, String from tests.integration.feature_repos.universal.entities import driver @@ -71,3 +72,28 @@ def test_partial() -> None: basic_rw_test(store, view_name="driver_locations") basic_rw_test(store, view_name="driver_locations_100") + + +@pytest.mark.integration +def test_read_pre_applied() -> None: + """ + Read feature values from the FeatureStore using a FeatureService. + """ + runner = CliRunner() + with runner.local_repo( + get_example_repo("example_feature_repo_1.py"), "bigquery" + ) as store: + + assert len(store.list_feature_services()) == 1 + fs = store.get_feature_service("driver_locations_service") + assert len(fs.tags) == 1 + assert fs.tags["release"] == "production" + + fv = store.get_feature_view("driver_locations") + + fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]]) + + store.apply([fs]) + + assert len(store.list_feature_services()) == 2 + store.get_feature_service("new_feature_service") \ No newline at end of file diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index 20ec962e6f..2b95a328fd 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -9,6 +9,10 @@ from feast import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment from tests.utils.cli_utils import CliRunner, get_example_repo +from tests.utils.e2e_test_utils import ( + NULLABLE_ONLINE_STORE_CONFIGS, + make_feature_store_yaml, +) from tests.utils.e2e_test_utils import make_feature_store_yaml from tests.utils.online_read_write_test_utils import basic_rw_test @@ -136,3 +140,28 @@ def test_odfv_apply(environment) -> None: assertpy.assert_that(result.returncode).is_equal_to(0) finally: runner.run(["teardown"], cwd=repo_path) + + +@pytest.mark.integration +@pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) +def test_nullable_online_store(test_nullable_online_store) -> None: + project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" + runner = CliRunner() + + with tempfile.TemporaryDirectory() as repo_dir_name: + try: + repo_path = Path(repo_dir_name) + feature_store_yaml = make_feature_store_yaml( + project, test_nullable_online_store, repo_path + ) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text(dedent(feature_store_yaml)) + + repo_example = repo_path / "example.py" + repo_example.write_text(get_example_repo("example_feature_repo_1.py")) + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + finally: + runner.run(["teardown"], cwd=repo_path) \ No newline at end of file diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index f20eea77f7..6e3a228deb 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -4,38 +4,9 @@ from pathlib import Path from textwrap import dedent -import pytest from assertpy import assertpy -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.e2e_test_utils import ( - NULLABLE_ONLINE_STORE_CONFIGS, - make_feature_store_yaml, -) - - -@pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) -def test_nullable_online_store(test_nullable_online_store) -> None: - project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" - runner = CliRunner() - - with tempfile.TemporaryDirectory() as repo_dir_name: - try: - repo_path = Path(repo_dir_name) - feature_store_yaml = make_feature_store_yaml( - project, test_nullable_online_store, repo_path - ) - - repo_config = repo_path / "feature_store.yaml" - - repo_config.write_text(dedent(feature_store_yaml)) - - repo_example = repo_path / "example.py" - repo_example.write_text(get_example_repo("example_feature_repo_1.py")) - result = runner.run(["apply"], cwd=repo_path) - assertpy.assert_that(result.returncode).is_equal_to(0) - finally: - runner.run(["teardown"], cwd=repo_path) +from tests.utils.cli_utils import CliRunner def test_3rd_party_providers() -> None: diff --git a/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py deleted file mode 100644 index 363f1f5ae9..0000000000 --- a/sdk/python/tests/unit/local_feast_tests/test_feature_service_apply.py +++ /dev/null @@ -1,26 +0,0 @@ -from feast import FeatureService -from tests.utils.cli_utils import CliRunner, get_example_repo - - -def test_read_pre_applied() -> None: - """ - Read feature values from the FeatureStore using a FeatureService. - """ - runner = CliRunner() - with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "bigquery" - ) as store: - - assert len(store.list_feature_services()) == 1 - fs = store.get_feature_service("driver_locations_service") - assert len(fs.tags) == 1 - assert fs.tags["release"] == "production" - - fv = store.get_feature_view("driver_locations") - - fs = FeatureService(name="new_feature_service", features=[fv[["lon"]]]) - - store.apply([fs]) - - assert len(store.list_feature_services()) == 2 - store.get_feature_service("new_feature_service") diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 685c23018f..07ebd56a73 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -17,7 +17,7 @@ def test_apply_stream_feature_view(simple_dataset_1) -> None: """ runner = CliRunner() with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "bigquery" + get_example_repo("example_feature_repo_1.py"), "file" ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: @@ -83,7 +83,7 @@ def test_stream_feature_view_udf(simple_dataset_1) -> None: """ runner = CliRunner() with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "bigquery" + get_example_repo("example_feature_repo_1.py"), "file" ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: diff --git a/sdk/python/tests/unit/test_feature_service.py b/sdk/python/tests/unit/test_feature_service.py index 2bd6a5b127..da69809b3e 100644 --- a/sdk/python/tests/unit/test_feature_service.py +++ b/sdk/python/tests/unit/test_feature_service.py @@ -5,7 +5,7 @@ from feast.field import Field from feast.infra.offline_stores.file_source import FileSource from feast.types import Float32 -from tests.utils.test_wrapper_utils import no_warnings +from tests.utils.test_wrappers import no_warnings def test_feature_service_with_description(): From ef2465fd3de9d2f8b7f6bbaffe228ec92fa0eea3 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 11:57:20 -0700 Subject: [PATCH 15/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/integration/e2e/test_universal_e2e.py | 4 ++-- .../tests/integration/registration/test_universal_cli.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 757078fd36..9da7b91969 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -2,8 +2,8 @@ import pytest -from feast.feature_service import FeatureService from feast import BigQuerySource, Entity, FeatureView, Field +from feast.feature_service import FeatureService from feast.types import Float32, String from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view @@ -96,4 +96,4 @@ def test_read_pre_applied() -> None: store.apply([fs]) assert len(store.list_feature_services()) == 2 - store.get_feature_service("new_feature_service") \ No newline at end of file + store.get_feature_service("new_feature_service") diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index 2b95a328fd..aec00c82e6 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -13,7 +13,6 @@ NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, ) -from tests.utils.e2e_test_utils import make_feature_store_yaml from tests.utils.online_read_write_test_utils import basic_rw_test @@ -164,4 +163,4 @@ def test_nullable_online_store(test_nullable_online_store) -> None: result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) finally: - runner.run(["teardown"], cwd=repo_path) \ No newline at end of file + runner.run(["teardown"], cwd=repo_path) From 5b2edb749f662016b81b16bc74d7ac9392ce118d Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 11:58:05 -0700 Subject: [PATCH 16/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/unit/cli/test_cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index 6e3a228deb..51fb93c770 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -1,5 +1,4 @@ import tempfile -import uuid from contextlib import contextmanager from pathlib import Path from textwrap import dedent From 5397529dfaffb504ab0bbe01fbd7b17108f944d4 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 12:00:57 -0700 Subject: [PATCH 17/30] Fix Signed-off-by: Kevin Zhang --- sdk/python/tests/integration/materialization/test_lambda.py | 5 ++++- .../tests/integration/registration/test_universal_cli.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 05288dadc8..395e5847f9 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -2,7 +2,10 @@ import pytest -from feast import Entity, Feature, FeatureView, ValueType +from feast.entity import Entity +from feast.feature import Feature +from feast.feature_view import FeatureView +from feast.types import ValueType from tests.data.data_creator import create_basic_driver_dataset from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index aec00c82e6..41a693f657 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -6,7 +6,7 @@ import pytest from assertpy import assertpy -from feast import FeatureStore +from feast.feature_store import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.e2e_test_utils import ( From 1e633c0b47b4afc6b5ce1c5a7fd6a48b86ff2d57 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 12:16:27 -0700 Subject: [PATCH 18/30] Fix Signed-off-by: Kevin Zhang --- .../test_stream_feature_view_apply.py | 47 +++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 07ebd56a73..2b645fbb0c 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -1,4 +1,11 @@ -from datetime import timedelta +import os +import tempfile +from datetime import datetime, timedelta + +from feast.driver_test_data import ( + create_driver_hourly_stats_df, + create_global_daily_stats_df, +) from feast.aggregation import Aggregation from feast.data_format import AvroFormat @@ -16,8 +23,25 @@ def test_apply_stream_feature_view(simple_dataset_1) -> None: Test apply of StreamFeatureView. """ runner = CliRunner() + with tempfile.TemporaryDirectory() as data_dir: + # Generate test data. + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") + driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True) + + global_df = create_global_daily_stats_df(start_date, end_date) + global_stats_path = os.path.join(data_dir, "global_stats.parquet") + global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) + with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "file" + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: @@ -82,8 +106,25 @@ def test_stream_feature_view_udf(simple_dataset_1) -> None: Test apply of StreamFeatureView udfs are serialized correctly and usable. """ runner = CliRunner() + with tempfile.TemporaryDirectory() as data_dir: + # Generate test data. + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") + driver_df.to_parquet(path=driver_stats_path, allow_truncated_timestamps=True) + + global_df = create_global_daily_stats_df(start_date, end_date) + global_stats_path = os.path.join(data_dir, "global_stats.parquet") + global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) + with runner.local_repo( - get_example_repo("example_feature_repo_1.py"), "file" + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: From 513f95c25e7aa21f2f6a881f144303c34d5db33c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 12:18:16 -0700 Subject: [PATCH 19/30] Fix lint Signed-off-by: Kevin Zhang --- .../test_stream_feature_view_apply.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 2b645fbb0c..bce4dc50a1 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -2,14 +2,13 @@ import tempfile from datetime import datetime, timedelta +from feast.aggregation import Aggregation +from feast.data_format import AvroFormat +from feast.data_source import KafkaSource from feast.driver_test_data import ( create_driver_hourly_stats_df, create_global_daily_stats_df, ) - -from feast.aggregation import Aggregation -from feast.data_format import AvroFormat -from feast.data_source import KafkaSource from feast.entity import Entity from feast.field import Field from feast.stream_feature_view import stream_feature_view @@ -38,10 +37,10 @@ def test_apply_stream_feature_view(simple_dataset_1) -> None: global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: @@ -121,10 +120,10 @@ def test_stream_feature_view_udf(simple_dataset_1) -> None: global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", ) as fs, prep_file_source( df=simple_dataset_1, timestamp_field="ts_1" ) as file_source: From eaae4f2dabb9b3e11c645eec6c3380887028157b Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 12:30:04 -0700 Subject: [PATCH 20/30] Fix Signed-off-by: Kevin Zhang --- .../registration/test_feature_store.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index f8c266e361..b0c753f391 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -14,6 +14,7 @@ import os import time from datetime import timedelta +from tempfile import mkstemp import pytest from pytest_lazyfixture import lazy_fixture @@ -26,6 +27,7 @@ from feast.field import Field from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig +from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig from feast.types import Array, Bytes, Float64, Int64, String from tests.utils.data_source_utils import ( @@ -218,6 +220,20 @@ def test_apply_feature_view_integration(test_feature_store): test_feature_store.teardown() +@pytest.fixture +def feature_store_with_local_registry(): + fd, registry_path = mkstemp() + fd, online_store_path = mkstemp() + return FeatureStore( + config=RepoConfig( + registry=registry_path, + project="default", + provider="local", + online_store=SqliteOnlineStoreConfig(path=online_store_path), + ) + ) + + @pytest.fixture def feature_store_with_gcs_registry(): from google.cloud import storage From 5e8734ee94ba600083c64605e26fedf713b756fa Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 14:39:41 -0700 Subject: [PATCH 21/30] Refactor Signed-off-by: Kevin Zhang --- sdk/python/tests/conftest.py | 2 +- sdk/python/tests/integration/e2e/test_go_feature_server.py | 4 ++-- sdk/python/tests/integration/e2e/test_universal_e2e.py | 6 +++--- sdk/python/tests/integration/e2e/test_validation.py | 4 ++-- sdk/python/tests/integration/materialization/test_lambda.py | 2 +- .../offline_store/test_universal_historical_retrieval.py | 2 +- .../tests/integration/online_store/test_online_retrieval.py | 2 +- .../tests/integration/online_store/test_universal_online.py | 2 +- .../tests/integration/registration/test_feature_store.py | 2 +- sdk/python/tests/integration/registration/test_inference.py | 2 +- sdk/python/tests/integration/registration/test_registry.py | 2 +- .../tests/integration/registration/test_universal_cli.py | 6 +++--- sdk/python/tests/unit/cli/test_cli.py | 2 +- sdk/python/tests/unit/cli/test_cli_apply_duplicates.py | 2 +- sdk/python/tests/unit/cli/test_cli_chdir.py | 2 +- .../unit/infra/online_store/test_dynamodb_online_store.py | 2 +- sdk/python/tests/unit/infra/test_inference_unit_tests.py | 2 +- sdk/python/tests/unit/infra/test_local_registry.py | 2 +- sdk/python/tests/unit/local_feast_tests/test_e2e_local.py | 6 +++--- .../unit/local_feast_tests/test_feature_service_read.py | 4 ++-- sdk/python/tests/unit/local_feast_tests/test_init.py | 2 +- .../local_feast_tests/test_stream_feature_view_apply.py | 4 ++-- ...ne_read_write_test_utils.py => basic_read_write_test.py} | 0 .../tests/utils/{cli_utils.py => cli_helper_functions.py} | 0 .../utils/{data_source_utils.py => data_source_creator.py} | 0 .../{online_store_utils.py => dynamo_table_creator.py} | 0 .../{e2e_test_utils.py => e2e_test_validation_functions.py} | 0 ..._store_test_utils.py => feature_store_test_functions.py} | 0 .../{http_server_test_utils.py => http_server_functions.py} | 0 .../{feature_test_utils.py => log_test_helper_functions.py} | 0 ...e_write_benchmark_utils.py => online_write_benchmark.py} | 0 31 files changed, 32 insertions(+), 32 deletions(-) rename sdk/python/tests/utils/{online_read_write_test_utils.py => basic_read_write_test.py} (100%) rename sdk/python/tests/utils/{cli_utils.py => cli_helper_functions.py} (100%) rename sdk/python/tests/utils/{data_source_utils.py => data_source_creator.py} (100%) rename sdk/python/tests/utils/{online_store_utils.py => dynamo_table_creator.py} (100%) rename sdk/python/tests/utils/{e2e_test_utils.py => e2e_test_validation_functions.py} (100%) rename sdk/python/tests/utils/{feature_store_test_utils.py => feature_store_test_functions.py} (100%) rename sdk/python/tests/utils/{http_server_test_utils.py => http_server_functions.py} (100%) rename sdk/python/tests/utils/{feature_test_utils.py => log_test_helper_functions.py} (100%) rename sdk/python/tests/utils/{online_write_benchmark_utils.py => online_write_benchmark.py} (100%) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index c6b38ed57b..b11e4b18b6 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -43,7 +43,7 @@ from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 FileDataSourceCreator, ) -from tests.utils.http_server_test_utils import check_port_open, free_port # noqa: E402 +from tests.utils.http_server_functions import check_port_open, free_port # noqa: E402 logger = logging.getLogger(__name__) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 1a0814c990..d278025892 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -32,8 +32,8 @@ driver, location, ) -from tests.utils.feature_test_utils import generate_expected_logs, get_latest_rows -from tests.utils.http_server_test_utils import check_port_open, free_port +from tests.utils.log_test_helper_functions import generate_expected_logs, get_latest_rows +from tests.utils.http_server_functions import check_port_open, free_port @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 9da7b91969..9a080e814b 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -7,9 +7,9 @@ from feast.types import Float32, String from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.e2e_test_utils import validate_offline_online_store_consistency -from tests.utils.online_read_write_test_utils import basic_rw_test +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.e2e_test_validation_functions import validate_offline_online_store_consistency +from tests.utils.basic_read_write_test import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index 982b28547a..048c0a88f9 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -26,8 +26,8 @@ driver, location, ) -from tests.utils.cli_utils import CliRunner -from tests.utils.feature_test_utils import prepare_logs +from tests.utils.cli_helper_functions import CliRunner +from tests.utils.log_test_helper_functions import prepare_logs _features = [ "customer_profile:current_balance", diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 395e5847f9..1ed3faaaf0 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -17,7 +17,7 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) -from tests.utils.e2e_test_utils import validate_offline_online_store_consistency +from tests.utils.e2e_test_validation_functions import validate_offline_online_store_consistency @pytest.mark.integration diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 524b9b31eb..22e3f3ddc6 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -27,7 +27,7 @@ driver, location, ) -from tests.utils.feature_store_test_utils import ( +from tests.utils.feature_store_test_functions import ( assert_feature_service_correctness, assert_feature_service_entity_mapping_correctness, get_expected_training_df, diff --git a/sdk/python/tests/integration/online_store/test_online_retrieval.py b/sdk/python/tests/integration/online_store/test_online_retrieval.py index 9cf4d9a182..9166e2809e 100644 --- a/sdk/python/tests/integration/online_store/test_online_retrieval.py +++ b/sdk/python/tests/integration/online_store/test_online_retrieval.py @@ -11,7 +11,7 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RegistryConfig -from tests.utils.cli_utils import CliRunner, get_example_repo +from tests.utils.cli_helper_functions import CliRunner, get_example_repo @pytest.mark.integration diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 6521c9ed2f..c88e462fa3 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -34,7 +34,7 @@ create_driver_hourly_stats_feature_view, driver_feature_view, ) -from tests.utils.data_source_utils import prep_file_source +from tests.utils.data_source_creator import prep_file_source @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index b0c753f391..8fbfab737e 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -30,7 +30,7 @@ from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig from feast.types import Array, Bytes, Float64, Int64, String -from tests.utils.data_source_utils import ( +from tests.utils.data_source_creator import ( prep_file_source, simple_bq_source_using_query_arg, simple_bq_source_using_table_arg, diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index d719b9f971..30d697e5bb 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -5,7 +5,7 @@ from feast import RepoConfig from feast.errors import RegistryInferenceFailure from feast.inference import update_data_sources_with_inferred_event_timestamp_col -from tests.utils.data_source_utils import ( +from tests.utils.data_source_creator import ( prep_file_source, simple_bq_source_using_query_arg, simple_bq_source_using_table_arg, diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index ceb26714a3..10d2e89811 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -26,7 +26,7 @@ from feast.registry import Registry from feast.repo_config import RegistryConfig from feast.types import Array, Bytes, Int64, String -from tests.utils.e2e_test_utils import validate_registry_data_source_apply +from tests.utils.e2e_test_validation_functions import validate_registry_data_source_apply @pytest.fixture diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index 41a693f657..81155fac46 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -8,12 +8,12 @@ from feast.feature_store import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.e2e_test_utils import ( +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.e2e_test_validation_functions import ( NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, ) -from tests.utils.online_read_write_test_utils import basic_rw_test +from tests.utils.basic_read_write_test import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index 51fb93c770..0c40590831 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -5,7 +5,7 @@ from assertpy import assertpy -from tests.utils.cli_utils import CliRunner +from tests.utils.cli_helper_functions import CliRunner def test_3rd_party_providers() -> None: diff --git a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py index bad3b50a80..b6a4fedb4e 100644 --- a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py +++ b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py @@ -2,7 +2,7 @@ from pathlib import Path from textwrap import dedent -from tests.utils.cli_utils import CliRunner, get_example_repo +from tests.utils.cli_helper_functions import CliRunner, get_example_repo def test_cli_apply_duplicated_featureview_names() -> None: diff --git a/sdk/python/tests/unit/cli/test_cli_chdir.py b/sdk/python/tests/unit/cli/test_cli_chdir.py index ff26c2f5e2..d9c049d033 100644 --- a/sdk/python/tests/unit/cli/test_cli_chdir.py +++ b/sdk/python/tests/unit/cli/test_cli_chdir.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from pathlib import Path -from tests.utils.cli_utils import CliRunner +from tests.utils.cli_helper_functions import CliRunner def test_cli_chdir() -> None: diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 1aff82abcc..c8eca6201f 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -14,7 +14,7 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RepoConfig -from tests.utils.online_store_utils import ( +from tests.utils.dynamo_table_creator import ( create_n_customer_test_samples, create_test_table, insert_data_test_table, diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index aa4fc10371..f1bf34587f 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -16,7 +16,7 @@ from feast.on_demand_feature_view import on_demand_feature_view from feast.repo_config import RepoConfig from feast.types import Float32, Float64, Int64, String, UnixTimestamp, ValueType -from tests.utils.data_source_utils import prep_file_source +from tests.utils.data_source_creator import prep_file_source def test_infer_datasource_names_file(): diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index 0d9f8df282..e06c76ef42 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -32,7 +32,7 @@ from feast.stream_feature_view import StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType -from tests.utils.e2e_test_utils import validate_registry_data_source_apply +from tests.utils.e2e_test_validation_functions import validate_registry_data_source_apply @pytest.fixture diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index 262e7687c1..ea5bf748e0 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -11,8 +11,8 @@ create_global_daily_stats_df, ) from feast.feature_store import FeatureStore -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.feature_store_test_utils import validate_online_features +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.feature_store_test_functions import validate_online_features @pytest.mark.integration @@ -40,7 +40,7 @@ def test_e2e_local() -> None: global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) - with runner.local_repo( + with runner.local_repo( get_example_repo("example_feature_repo_2.py") .replace("%PARQUET_PATH%", driver_stats_path) .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), diff --git a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py index d7b9c07664..df00a1749c 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py +++ b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py @@ -1,7 +1,7 @@ import pytest -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.online_read_write_test_utils import basic_rw_test +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.basic_read_write_test import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/unit/local_feast_tests/test_init.py b/sdk/python/tests/unit/local_feast_tests/test_init.py index 1cada91ea0..f361a27cdc 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_init.py +++ b/sdk/python/tests/unit/local_feast_tests/test_init.py @@ -3,7 +3,7 @@ from pathlib import Path from textwrap import dedent -from tests.utils.cli_utils import CliRunner +from tests.utils.cli_helper_functions import CliRunner def test_repo_init() -> None: diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index bce4dc50a1..02240d7dd9 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -13,8 +13,8 @@ from feast.field import Field from feast.stream_feature_view import stream_feature_view from feast.types import Float32 -from tests.utils.cli_utils import CliRunner, get_example_repo -from tests.utils.data_source_utils import prep_file_source +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.data_source_creator import prep_file_source def test_apply_stream_feature_view(simple_dataset_1) -> None: diff --git a/sdk/python/tests/utils/online_read_write_test_utils.py b/sdk/python/tests/utils/basic_read_write_test.py similarity index 100% rename from sdk/python/tests/utils/online_read_write_test_utils.py rename to sdk/python/tests/utils/basic_read_write_test.py diff --git a/sdk/python/tests/utils/cli_utils.py b/sdk/python/tests/utils/cli_helper_functions.py similarity index 100% rename from sdk/python/tests/utils/cli_utils.py rename to sdk/python/tests/utils/cli_helper_functions.py diff --git a/sdk/python/tests/utils/data_source_utils.py b/sdk/python/tests/utils/data_source_creator.py similarity index 100% rename from sdk/python/tests/utils/data_source_utils.py rename to sdk/python/tests/utils/data_source_creator.py diff --git a/sdk/python/tests/utils/online_store_utils.py b/sdk/python/tests/utils/dynamo_table_creator.py similarity index 100% rename from sdk/python/tests/utils/online_store_utils.py rename to sdk/python/tests/utils/dynamo_table_creator.py diff --git a/sdk/python/tests/utils/e2e_test_utils.py b/sdk/python/tests/utils/e2e_test_validation_functions.py similarity index 100% rename from sdk/python/tests/utils/e2e_test_utils.py rename to sdk/python/tests/utils/e2e_test_validation_functions.py diff --git a/sdk/python/tests/utils/feature_store_test_utils.py b/sdk/python/tests/utils/feature_store_test_functions.py similarity index 100% rename from sdk/python/tests/utils/feature_store_test_utils.py rename to sdk/python/tests/utils/feature_store_test_functions.py diff --git a/sdk/python/tests/utils/http_server_test_utils.py b/sdk/python/tests/utils/http_server_functions.py similarity index 100% rename from sdk/python/tests/utils/http_server_test_utils.py rename to sdk/python/tests/utils/http_server_functions.py diff --git a/sdk/python/tests/utils/feature_test_utils.py b/sdk/python/tests/utils/log_test_helper_functions.py similarity index 100% rename from sdk/python/tests/utils/feature_test_utils.py rename to sdk/python/tests/utils/log_test_helper_functions.py diff --git a/sdk/python/tests/utils/online_write_benchmark_utils.py b/sdk/python/tests/utils/online_write_benchmark.py similarity index 100% rename from sdk/python/tests/utils/online_write_benchmark_utils.py rename to sdk/python/tests/utils/online_write_benchmark.py From da8e4faab231a66d2999816df4178352b0a74f0c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 14:40:20 -0700 Subject: [PATCH 22/30] Fix Signed-off-by: Kevin Zhang --- .../tests/integration/e2e/test_go_feature_server.py | 5 ++++- sdk/python/tests/integration/e2e/test_universal_e2e.py | 6 ++++-- .../tests/integration/materialization/test_lambda.py | 4 +++- .../tests/integration/registration/test_registry.py | 4 +++- .../tests/integration/registration/test_universal_cli.py | 2 +- sdk/python/tests/unit/infra/test_local_registry.py | 4 +++- sdk/python/tests/unit/local_feast_tests/test_e2e_local.py | 8 ++++---- .../unit/local_feast_tests/test_feature_service_read.py | 2 +- 8 files changed, 23 insertions(+), 12 deletions(-) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index d278025892..74f61d3ef5 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -32,8 +32,11 @@ driver, location, ) -from tests.utils.log_test_helper_functions import generate_expected_logs, get_latest_rows from tests.utils.http_server_functions import check_port_open, free_port +from tests.utils.log_test_helper_functions import ( + generate_expected_logs, + get_latest_rows, +) @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 9a080e814b..bba9e34437 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -7,9 +7,11 @@ from feast.types import Float32, String from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view -from tests.utils.cli_helper_functions import CliRunner, get_example_repo -from tests.utils.e2e_test_validation_functions import validate_offline_online_store_consistency from tests.utils.basic_read_write_test import basic_rw_test +from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.e2e_test_validation_functions import ( + validate_offline_online_store_consistency, +) @pytest.mark.integration diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 1ed3faaaf0..255dc33098 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -17,7 +17,9 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) -from tests.utils.e2e_test_validation_functions import validate_offline_online_store_consistency +from tests.utils.e2e_test_validation_functions import ( + validate_offline_online_store_consistency, +) @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 10d2e89811..d50fa08d88 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -26,7 +26,9 @@ from feast.registry import Registry from feast.repo_config import RegistryConfig from feast.types import Array, Bytes, Int64, String -from tests.utils.e2e_test_validation_functions import validate_registry_data_source_apply +from tests.utils.e2e_test_validation_functions import ( + validate_registry_data_source_apply, +) @pytest.fixture diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index 81155fac46..7081a16c3f 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -8,12 +8,12 @@ from feast.feature_store import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment +from tests.utils.basic_read_write_test import basic_rw_test from tests.utils.cli_helper_functions import CliRunner, get_example_repo from tests.utils.e2e_test_validation_functions import ( NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, ) -from tests.utils.basic_read_write_test import basic_rw_test @pytest.mark.integration diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index e06c76ef42..b2cbd78e05 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -32,7 +32,9 @@ from feast.stream_feature_view import StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType -from tests.utils.e2e_test_validation_functions import validate_registry_data_source_apply +from tests.utils.e2e_test_validation_functions import ( + validate_registry_data_source_apply, +) @pytest.fixture diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index ea5bf748e0..a21014c3bc 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -45,10 +45,10 @@ def test_e2e_local() -> None: .replace("%PARQUET_PATH%", driver_stats_path) .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), "file", - ) as store: - _test_materialize_and_online_retrieval( - runner, store, start_date, end_date, driver_df - ) + ) as store: + _test_materialize_and_online_retrieval( + runner, store, start_date, end_date, driver_df + ) with runner.local_repo( get_example_repo("example_feature_repo_version_0_19.py") diff --git a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py index df00a1749c..e95b64ffa9 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py +++ b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py @@ -1,7 +1,7 @@ import pytest -from tests.utils.cli_helper_functions import CliRunner, get_example_repo from tests.utils.basic_read_write_test import basic_rw_test +from tests.utils.cli_helper_functions import CliRunner, get_example_repo @pytest.mark.integration From 43b3900340f98ac891f663dcdda90967d88c9983 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 14:41:26 -0700 Subject: [PATCH 23/30] Fx lit Signed-off-by: Kevin Zhang --- .../tests/integration/online_store/test_universal_online.py | 2 +- sdk/python/tests/integration/registration/test_feature_store.py | 2 +- sdk/python/tests/integration/registration/test_inference.py | 2 +- sdk/python/tests/unit/diff/test_registry_diff.py | 2 +- sdk/python/tests/unit/infra/test_inference_unit_tests.py | 2 +- .../tests/unit/local_feast_tests/test_local_feature_store.py | 2 +- .../unit/local_feast_tests/test_stream_feature_view_apply.py | 2 +- .../{data_source_creator.py => data_source_test_creator.py} | 0 8 files changed, 7 insertions(+), 7 deletions(-) rename sdk/python/tests/utils/{data_source_creator.py => data_source_test_creator.py} (100%) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index c88e462fa3..738b00f7d7 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -34,7 +34,7 @@ create_driver_hourly_stats_feature_view, driver_feature_view, ) -from tests.utils.data_source_creator import prep_file_source +from tests.utils.data_source_test_creator import prep_file_source @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 8fbfab737e..7b95afadba 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -30,7 +30,7 @@ from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig from feast.types import Array, Bytes, Float64, Int64, String -from tests.utils.data_source_creator import ( +from tests.utils.data_source_test_creator import ( prep_file_source, simple_bq_source_using_query_arg, simple_bq_source_using_table_arg, diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 30d697e5bb..de02fe53fe 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -5,7 +5,7 @@ from feast import RepoConfig from feast.errors import RegistryInferenceFailure from feast.inference import update_data_sources_with_inferred_event_timestamp_col -from tests.utils.data_source_creator import ( +from tests.utils.data_source_test_creator import ( prep_file_source, simple_bq_source_using_query_arg, simple_bq_source_using_table_arg, diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index d12fc717f0..0effdfba97 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -4,7 +4,7 @@ ) from feast.entity import Entity from feast.feature_view import FeatureView -from tests.utils.data_source_utils import prep_file_source +from tests.utils.data_source_test_creator import prep_file_source def test_tag_objects_for_keep_delete_update_add(simple_dataset_1): diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index f1bf34587f..7a564679d6 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -16,7 +16,7 @@ from feast.on_demand_feature_view import on_demand_feature_view from feast.repo_config import RepoConfig from feast.types import Float32, Float64, Int64, String, UnixTimestamp, ValueType -from tests.utils.data_source_creator import prep_file_source +from tests.utils.data_source_test_creator import prep_file_source def test_infer_datasource_names_file(): diff --git a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py index 4ae42eacf1..44a35e0660 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py +++ b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py @@ -13,7 +13,7 @@ from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig from feast.types import Array, Bytes, Int64, String -from tests.utils.data_source_utils import prep_file_source +from tests.utils.data_source_test_creator import prep_file_source @pytest.mark.parametrize( diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 02240d7dd9..f5818d8e23 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -14,7 +14,7 @@ from feast.stream_feature_view import stream_feature_view from feast.types import Float32 from tests.utils.cli_helper_functions import CliRunner, get_example_repo -from tests.utils.data_source_creator import prep_file_source +from tests.utils.data_source_test_creator import prep_file_source def test_apply_stream_feature_view(simple_dataset_1) -> None: diff --git a/sdk/python/tests/utils/data_source_creator.py b/sdk/python/tests/utils/data_source_test_creator.py similarity index 100% rename from sdk/python/tests/utils/data_source_creator.py rename to sdk/python/tests/utils/data_source_test_creator.py From d5aad96e1ba2d9c472c10d9e780ebd047d2f4dda Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 14:42:50 -0700 Subject: [PATCH 24/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/unit/local_feast_tests/test_e2e_local.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index a21014c3bc..1d8d7009cc 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -41,10 +41,10 @@ def test_e2e_local() -> None: global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", ) as store: _test_materialize_and_online_retrieval( runner, store, start_date, end_date, driver_df From 1cb34ed6d8e7bcb6880d6994e5f974979633c5db Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 15:00:57 -0700 Subject: [PATCH 25/30] update fix Signed-off-by: Kevin Zhang --- .../tests/integration/offline_store/test_feature_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/tests/integration/offline_store/test_feature_logging.py b/sdk/python/tests/integration/offline_store/test_feature_logging.py index d28f92ce6d..c048e46734 100644 --- a/sdk/python/tests/integration/offline_store/test_feature_logging.py +++ b/sdk/python/tests/integration/offline_store/test_feature_logging.py @@ -22,7 +22,7 @@ location, ) from tests.integration.feature_repos.universal.feature_views import conv_rate_plus_100 -from tests.utils.feature_test_utils import prepare_logs, to_logs_dataset +from tests.utils.log_test_helper_functions import prepare_logs, to_logs_dataset @pytest.mark.integration From aa6fa79b8f64c396f59ed711f7e9f1271ea3cbdb Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 15:23:27 -0700 Subject: [PATCH 26/30] Fix Signed-off-by: Kevin Zhang --- docs/how-to-guides/adding-or-reusing-tests.md | 3 + .../unit/local_feast_tests/test_e2e_local.py | 19 +- .../test_stream_feature_view_apply.py | 250 +++++++++--------- 3 files changed, 137 insertions(+), 135 deletions(-) diff --git a/docs/how-to-guides/adding-or-reusing-tests.md b/docs/how-to-guides/adding-or-reusing-tests.md index 86c116442f..2fc4a66849 100644 --- a/docs/how-to-guides/adding-or-reusing-tests.md +++ b/docs/how-to-guides/adding-or-reusing-tests.md @@ -142,6 +142,9 @@ def test_historical_features(environment, universal_data_sources, full_feature_n The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories. This by default pulls in a standard dataset with driver and customer entities, certain feature views, and feature values. By including the environment as a parameter, the test automatically parametrizes across other offline / online store combinations. +## Debugging Test Failures + + ## Writing a new test or reusing existing tests ### To add a new test to an existing test file diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index 1d8d7009cc..68106e23a7 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -15,7 +15,6 @@ from tests.utils.feature_store_test_functions import validate_online_features -@pytest.mark.integration def test_e2e_local() -> None: """ Tests the end-to-end workflow of apply, materialize, and online retrieval. @@ -40,15 +39,15 @@ def test_e2e_local() -> None: global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) - with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", - ) as store: - _test_materialize_and_online_retrieval( - runner, store, start_date, end_date, driver_df - ) + with runner.local_repo( + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", + ) as store: + _test_materialize_and_online_retrieval( + runner, store, start_date, end_date, driver_df + ) with runner.local_repo( get_example_repo("example_feature_repo_version_0_19.py") diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index f5818d8e23..978a7480dc 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -36,68 +36,68 @@ def test_apply_stream_feature_view(simple_dataset_1) -> None: global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) - with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", - ) as fs, prep_file_source( - df=simple_dataset_1, timestamp_field="ts_1" - ) as file_source: - entity = Entity(name="driver_entity", join_keys=["test_key"]) - - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=file_source, - watermark_delay_threshold=timedelta(days=1), - ) - - @stream_feature_view( - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ), - Aggregation( - column="dummy_field2", - function="count", - time_window=timedelta(days=24), - ), - ], - timestamp_field="event_timestamp", - mode="spark", - source=stream_source, - tags={}, - ) - def simple_sfv(df): - return df - - fs.apply([entity, simple_sfv]) - - stream_feature_views = fs.list_stream_feature_views() - assert len(stream_feature_views) == 1 - assert stream_feature_views[0] == simple_sfv - - features = fs.get_online_features( - features=["simple_sfv:dummy_field"], - entity_rows=[{"test_key": 1001}], - ).to_dict(include_event_timestamps=True) - - assert "test_key" in features - assert features["test_key"] == [1001] - assert "dummy_field" in features - assert features["dummy_field"] == [None] + with runner.local_repo( + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", + ) as fs, prep_file_source( + df=simple_dataset_1, timestamp_field="ts_1" + ) as file_source: + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=file_source, + watermark_delay_threshold=timedelta(days=1), + ) + + @stream_feature_view( + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ), + Aggregation( + column="dummy_field2", + function="count", + time_window=timedelta(days=24), + ), + ], + timestamp_field="event_timestamp", + mode="spark", + source=stream_source, + tags={}, + ) + def simple_sfv(df): + return df + + fs.apply([entity, simple_sfv]) + + stream_feature_views = fs.list_stream_feature_views() + assert len(stream_feature_views) == 1 + assert stream_feature_views[0] == simple_sfv + + features = fs.get_online_features( + features=["simple_sfv:dummy_field"], + entity_rows=[{"test_key": 1001}], + ).to_dict(include_event_timestamps=True) + + assert "test_key" in features + assert features["test_key"] == [1001] + assert "dummy_field" in features + assert features["dummy_field"] == [None] def test_stream_feature_view_udf(simple_dataset_1) -> None: @@ -119,71 +119,71 @@ def test_stream_feature_view_udf(simple_dataset_1) -> None: global_stats_path = os.path.join(data_dir, "global_stats.parquet") global_df.to_parquet(path=global_stats_path, allow_truncated_timestamps=True) - with runner.local_repo( - get_example_repo("example_feature_repo_2.py") - .replace("%PARQUET_PATH%", driver_stats_path) - .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), - "file", - ) as fs, prep_file_source( - df=simple_dataset_1, timestamp_field="ts_1" - ) as file_source: - entity = Entity(name="driver_entity", join_keys=["test_key"]) - - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=file_source, - watermark_delay_threshold=timedelta(days=1), - ) - - @stream_feature_view( - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ), - Aggregation( - column="dummy_field2", - function="count", - time_window=timedelta(days=24), - ), - ], - timestamp_field="event_timestamp", - mode="spark", - source=stream_source, - tags={}, - ) - def pandas_view(pandas_df): - import pandas as pd - - assert type(pandas_df) == pd.DataFrame - df = pandas_df.transform(lambda x: x + 10, axis=1) - df.insert(2, "C", [20.2, 230.0, 34.0], True) - return df + with runner.local_repo( + get_example_repo("example_feature_repo_2.py") + .replace("%PARQUET_PATH%", driver_stats_path) + .replace("%PARQUET_PATH_GLOBAL%", global_stats_path), + "file", + ) as fs, prep_file_source( + df=simple_dataset_1, timestamp_field="ts_1" + ) as file_source: + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=file_source, + watermark_delay_threshold=timedelta(days=1), + ) + + @stream_feature_view( + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ), + Aggregation( + column="dummy_field2", + function="count", + time_window=timedelta(days=24), + ), + ], + timestamp_field="event_timestamp", + mode="spark", + source=stream_source, + tags={}, + ) + def pandas_view(pandas_df): + import pandas as pd + + assert type(pandas_df) == pd.DataFrame + df = pandas_df.transform(lambda x: x + 10, axis=1) + df.insert(2, "C", [20.2, 230.0, 34.0], True) + return df - import pandas as pd + import pandas as pd - fs.apply([entity, pandas_view]) + fs.apply([entity, pandas_view]) - stream_feature_views = fs.list_stream_feature_views() - assert len(stream_feature_views) == 1 - assert stream_feature_views[0] == pandas_view + stream_feature_views = fs.list_stream_feature_views() + assert len(stream_feature_views) == 1 + assert stream_feature_views[0] == pandas_view - sfv = stream_feature_views[0] + sfv = stream_feature_views[0] - df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]}) - new_df = sfv.udf(df) - expected_df = pd.DataFrame( - {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]} - ) - assert new_df.equals(expected_df) + df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]}) + new_df = sfv.udf(df) + expected_df = pd.DataFrame( + {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]} + ) + assert new_df.equals(expected_df) From 27ef453f1a8eadafe2c8a01f34a9d4c9c4726720 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 15:24:36 -0700 Subject: [PATCH 27/30] Revert Signed-off-by: Kevin Zhang --- docs/how-to-guides/adding-or-reusing-tests.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/how-to-guides/adding-or-reusing-tests.md b/docs/how-to-guides/adding-or-reusing-tests.md index 2fc4a66849..86c116442f 100644 --- a/docs/how-to-guides/adding-or-reusing-tests.md +++ b/docs/how-to-guides/adding-or-reusing-tests.md @@ -142,9 +142,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories. This by default pulls in a standard dataset with driver and customer entities, certain feature views, and feature values. By including the environment as a parameter, the test automatically parametrizes across other offline / online store combinations. -## Debugging Test Failures - - ## Writing a new test or reusing existing tests ### To add a new test to an existing test file From 1b303ac28312e523a8600e478a2bc3914368f19e Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Thu, 28 Jul 2022 15:49:48 -0700 Subject: [PATCH 28/30] Fix Signed-off-by: Kevin Zhang --- sdk/python/tests/unit/local_feast_tests/test_e2e_local.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index 68106e23a7..734b96765d 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -4,7 +4,6 @@ from pathlib import Path import pandas as pd -import pytest from feast.driver_test_data import ( create_driver_hourly_stats_df, From 9067a45239a3dbc8a236d4c44d4261ef88ff971c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 29 Jul 2022 09:54:09 -0700 Subject: [PATCH 29/30] fix Signed-off-by: Kevin Zhang --- sdk/python/tests/conftest.py | 2 +- sdk/python/tests/integration/e2e/test_go_feature_server.py | 4 ++-- sdk/python/tests/integration/e2e/test_universal_e2e.py | 4 ++-- sdk/python/tests/integration/e2e/test_validation.py | 4 ++-- sdk/python/tests/integration/materialization/test_lambda.py | 2 +- .../tests/integration/offline_store/test_feature_logging.py | 2 +- .../offline_store/test_universal_historical_retrieval.py | 2 +- .../tests/integration/online_store/test_online_retrieval.py | 2 +- sdk/python/tests/integration/registration/test_registry.py | 2 +- .../tests/integration/registration/test_universal_cli.py | 4 ++-- sdk/python/tests/unit/cli/test_cli.py | 2 +- sdk/python/tests/unit/cli/test_cli_apply_duplicates.py | 2 +- sdk/python/tests/unit/cli/test_cli_chdir.py | 2 +- sdk/python/tests/unit/infra/test_local_registry.py | 2 +- sdk/python/tests/unit/local_feast_tests/test_e2e_local.py | 4 ++-- .../tests/unit/local_feast_tests/test_feature_service_read.py | 2 +- sdk/python/tests/unit/local_feast_tests/test_init.py | 2 +- .../unit/local_feast_tests/test_stream_feature_view_apply.py | 2 +- .../utils/{cli_helper_functions.py => cli_repo_creator.py} | 0 ...2e_test_validation_functions.py => e2e_test_validation.py} | 0 .../{feature_store_test_functions.py => feature_records.py} | 0 .../tests/utils/{http_server_functions.py => http_server.py} | 0 .../{log_test_helper_functions.py => test_log_creator.py} | 0 23 files changed, 23 insertions(+), 23 deletions(-) rename sdk/python/tests/utils/{cli_helper_functions.py => cli_repo_creator.py} (100%) rename sdk/python/tests/utils/{e2e_test_validation_functions.py => e2e_test_validation.py} (100%) rename sdk/python/tests/utils/{feature_store_test_functions.py => feature_records.py} (100%) rename sdk/python/tests/utils/{http_server_functions.py => http_server.py} (100%) rename sdk/python/tests/utils/{log_test_helper_functions.py => test_log_creator.py} (100%) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index b11e4b18b6..b4bcccd9c6 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -43,7 +43,7 @@ from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 FileDataSourceCreator, ) -from tests.utils.http_server_functions import check_port_open, free_port # noqa: E402 +from tests.utils.http_server import check_port_open, free_port # noqa: E402 logger = logging.getLogger(__name__) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index 74f61d3ef5..e5d2508597 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -32,8 +32,8 @@ driver, location, ) -from tests.utils.http_server_functions import check_port_open, free_port -from tests.utils.log_test_helper_functions import ( +from tests.utils.http_server import check_port_open, free_port +from tests.utils.test_log_creator import ( generate_expected_logs, get_latest_rows, ) diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index bba9e34437..4633bd22cd 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -8,8 +8,8 @@ from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import driver_feature_view from tests.utils.basic_read_write_test import basic_rw_test -from tests.utils.cli_helper_functions import CliRunner, get_example_repo -from tests.utils.e2e_test_validation_functions import ( +from tests.utils.cli_repo_creator import CliRunner, get_example_repo +from tests.utils.e2e_test_validation import ( validate_offline_online_store_consistency, ) diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index 048c0a88f9..7062948f53 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -26,8 +26,8 @@ driver, location, ) -from tests.utils.cli_helper_functions import CliRunner -from tests.utils.log_test_helper_functions import prepare_logs +from tests.utils.cli_repo_creator import CliRunner +from tests.utils.test_log_creator import prepare_logs _features = [ "customer_profile:current_balance", diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 255dc33098..458625120b 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -17,7 +17,7 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) -from tests.utils.e2e_test_validation_functions import ( +from tests.utils.e2e_test_validation import ( validate_offline_online_store_consistency, ) diff --git a/sdk/python/tests/integration/offline_store/test_feature_logging.py b/sdk/python/tests/integration/offline_store/test_feature_logging.py index c048e46734..eba994544d 100644 --- a/sdk/python/tests/integration/offline_store/test_feature_logging.py +++ b/sdk/python/tests/integration/offline_store/test_feature_logging.py @@ -22,7 +22,7 @@ location, ) from tests.integration.feature_repos.universal.feature_views import conv_rate_plus_100 -from tests.utils.log_test_helper_functions import prepare_logs, to_logs_dataset +from tests.utils.test_log_creator import prepare_logs, to_logs_dataset @pytest.mark.integration diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 22e3f3ddc6..718b7577d9 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -27,7 +27,7 @@ driver, location, ) -from tests.utils.feature_store_test_functions import ( +from tests.utils.feature_records import ( assert_feature_service_correctness, assert_feature_service_entity_mapping_correctness, get_expected_training_df, diff --git a/sdk/python/tests/integration/online_store/test_online_retrieval.py b/sdk/python/tests/integration/online_store/test_online_retrieval.py index 9166e2809e..988af6e7e9 100644 --- a/sdk/python/tests/integration/online_store/test_online_retrieval.py +++ b/sdk/python/tests/integration/online_store/test_online_retrieval.py @@ -11,7 +11,7 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RegistryConfig -from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.cli_repo_creator import CliRunner, get_example_repo @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index d50fa08d88..2e1ae448ba 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -26,7 +26,7 @@ from feast.registry import Registry from feast.repo_config import RegistryConfig from feast.types import Array, Bytes, Int64, String -from tests.utils.e2e_test_validation_functions import ( +from tests.utils.e2e_test_validation import ( validate_registry_data_source_apply, ) diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index 7081a16c3f..3e77f74edd 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -9,8 +9,8 @@ from feast.feature_store import FeatureStore from tests.integration.feature_repos.repo_configuration import Environment from tests.utils.basic_read_write_test import basic_rw_test -from tests.utils.cli_helper_functions import CliRunner, get_example_repo -from tests.utils.e2e_test_validation_functions import ( +from tests.utils.cli_repo_creator import CliRunner, get_example_repo +from tests.utils.e2e_test_validation import ( NULLABLE_ONLINE_STORE_CONFIGS, make_feature_store_yaml, ) diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index 0c40590831..9b535ce8fb 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -5,7 +5,7 @@ from assertpy import assertpy -from tests.utils.cli_helper_functions import CliRunner +from tests.utils.cli_repo_creator import CliRunner def test_3rd_party_providers() -> None: diff --git a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py index b6a4fedb4e..f61a46516e 100644 --- a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py +++ b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py @@ -2,7 +2,7 @@ from pathlib import Path from textwrap import dedent -from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.cli_repo_creator import CliRunner, get_example_repo def test_cli_apply_duplicated_featureview_names() -> None: diff --git a/sdk/python/tests/unit/cli/test_cli_chdir.py b/sdk/python/tests/unit/cli/test_cli_chdir.py index d9c049d033..8260a95efd 100644 --- a/sdk/python/tests/unit/cli/test_cli_chdir.py +++ b/sdk/python/tests/unit/cli/test_cli_chdir.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from pathlib import Path -from tests.utils.cli_helper_functions import CliRunner +from tests.utils.cli_repo_creator import CliRunner def test_cli_chdir() -> None: diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index b2cbd78e05..c9805f9ee9 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -32,7 +32,7 @@ from feast.stream_feature_view import StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType -from tests.utils.e2e_test_validation_functions import ( +from tests.utils.e2e_test_validation import ( validate_registry_data_source_apply, ) diff --git a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py index 734b96765d..5fbedf944d 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py +++ b/sdk/python/tests/unit/local_feast_tests/test_e2e_local.py @@ -10,8 +10,8 @@ create_global_daily_stats_df, ) from feast.feature_store import FeatureStore -from tests.utils.cli_helper_functions import CliRunner, get_example_repo -from tests.utils.feature_store_test_functions import validate_online_features +from tests.utils.cli_repo_creator import CliRunner, get_example_repo +from tests.utils.feature_records import validate_online_features def test_e2e_local() -> None: diff --git a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py index e95b64ffa9..72392b0396 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py +++ b/sdk/python/tests/unit/local_feast_tests/test_feature_service_read.py @@ -1,7 +1,7 @@ import pytest from tests.utils.basic_read_write_test import basic_rw_test -from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.cli_repo_creator import CliRunner, get_example_repo @pytest.mark.integration diff --git a/sdk/python/tests/unit/local_feast_tests/test_init.py b/sdk/python/tests/unit/local_feast_tests/test_init.py index f361a27cdc..f9bf536e56 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_init.py +++ b/sdk/python/tests/unit/local_feast_tests/test_init.py @@ -3,7 +3,7 @@ from pathlib import Path from textwrap import dedent -from tests.utils.cli_helper_functions import CliRunner +from tests.utils.cli_repo_creator import CliRunner def test_repo_init() -> None: diff --git a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py index 978a7480dc..ca54d882b5 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py +++ b/sdk/python/tests/unit/local_feast_tests/test_stream_feature_view_apply.py @@ -13,7 +13,7 @@ from feast.field import Field from feast.stream_feature_view import stream_feature_view from feast.types import Float32 -from tests.utils.cli_helper_functions import CliRunner, get_example_repo +from tests.utils.cli_repo_creator import CliRunner, get_example_repo from tests.utils.data_source_test_creator import prep_file_source diff --git a/sdk/python/tests/utils/cli_helper_functions.py b/sdk/python/tests/utils/cli_repo_creator.py similarity index 100% rename from sdk/python/tests/utils/cli_helper_functions.py rename to sdk/python/tests/utils/cli_repo_creator.py diff --git a/sdk/python/tests/utils/e2e_test_validation_functions.py b/sdk/python/tests/utils/e2e_test_validation.py similarity index 100% rename from sdk/python/tests/utils/e2e_test_validation_functions.py rename to sdk/python/tests/utils/e2e_test_validation.py diff --git a/sdk/python/tests/utils/feature_store_test_functions.py b/sdk/python/tests/utils/feature_records.py similarity index 100% rename from sdk/python/tests/utils/feature_store_test_functions.py rename to sdk/python/tests/utils/feature_records.py diff --git a/sdk/python/tests/utils/http_server_functions.py b/sdk/python/tests/utils/http_server.py similarity index 100% rename from sdk/python/tests/utils/http_server_functions.py rename to sdk/python/tests/utils/http_server.py diff --git a/sdk/python/tests/utils/log_test_helper_functions.py b/sdk/python/tests/utils/test_log_creator.py similarity index 100% rename from sdk/python/tests/utils/log_test_helper_functions.py rename to sdk/python/tests/utils/test_log_creator.py From 96b7efb187196565f9f3cf5184eec74b7a81747c Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 29 Jul 2022 09:55:45 -0700 Subject: [PATCH 30/30] Fix lint Signed-off-by: Kevin Zhang --- sdk/python/tests/integration/e2e/test_go_feature_server.py | 5 +---- sdk/python/tests/integration/e2e/test_universal_e2e.py | 4 +--- sdk/python/tests/integration/materialization/test_lambda.py | 4 +--- sdk/python/tests/integration/registration/test_registry.py | 4 +--- sdk/python/tests/unit/infra/test_local_registry.py | 4 +--- 5 files changed, 5 insertions(+), 16 deletions(-) diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py index e5d2508597..0f972e45df 100644 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ b/sdk/python/tests/integration/e2e/test_go_feature_server.py @@ -33,10 +33,7 @@ location, ) from tests.utils.http_server import check_port_open, free_port -from tests.utils.test_log_creator import ( - generate_expected_logs, - get_latest_rows, -) +from tests.utils.test_log_creator import generate_expected_logs, get_latest_rows @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index 4633bd22cd..5dc0c042d9 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -9,9 +9,7 @@ from tests.integration.feature_repos.universal.feature_views import driver_feature_view from tests.utils.basic_read_write_test import basic_rw_test from tests.utils.cli_repo_creator import CliRunner, get_example_repo -from tests.utils.e2e_test_validation import ( - validate_offline_online_store_consistency, -) +from tests.utils.e2e_test_validation import validate_offline_online_store_consistency @pytest.mark.integration diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 458625120b..8ffd31e0cd 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -17,9 +17,7 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) -from tests.utils.e2e_test_validation import ( - validate_offline_online_store_consistency, -) +from tests.utils.e2e_test_validation import validate_offline_online_store_consistency @pytest.mark.integration diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 2e1ae448ba..0cc161d997 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -26,9 +26,7 @@ from feast.registry import Registry from feast.repo_config import RegistryConfig from feast.types import Array, Bytes, Int64, String -from tests.utils.e2e_test_validation import ( - validate_registry_data_source_apply, -) +from tests.utils.e2e_test_validation import validate_registry_data_source_apply @pytest.fixture diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index c9805f9ee9..d69ae6aafd 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -32,9 +32,7 @@ from feast.stream_feature_view import StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType -from tests.utils.e2e_test_validation import ( - validate_registry_data_source_apply, -) +from tests.utils.e2e_test_validation import validate_registry_data_source_apply @pytest.fixture