-
Notifications
You must be signed in to change notification settings - Fork 950
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* support parquet ingestion Signed-off-by: Oleg Avdeev <oleg.v.avdeev@gmail.com> * better comments Signed-off-by: Oleg Avdeev <oleg.v.avdeev@gmail.com> * Update sdk/python/feast/feature_store.py Co-authored-by: Willem Pienaar <6728866+woop@users.noreply.github.com> Signed-off-by: Oleg Avdeev <oleg.v.avdeev@gmail.com> * better help for materialize command Signed-off-by: Oleg Avdeev <oleg.v.avdeev@gmail.com>
- Loading branch information
Showing
11 changed files
with
361 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from google.protobuf.duration_pb2 import Duration | ||
|
||
from feast import Entity, Feature, FeatureView, ValueType | ||
from feast.data_source import FileSource | ||
|
||
driver_hourly_stats = FileSource( | ||
path="%PARQUET_PATH%", # placeholder to be replaced by the test | ||
event_timestamp_column="datetime", | ||
created_timestamp_column="created", | ||
) | ||
|
||
driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) | ||
|
||
|
||
driver_hourly_stats = FeatureView( | ||
name="driver_hourly_stats", | ||
entities=["driver_id"], | ||
ttl=Duration(seconds=86400 * 1), | ||
features=[ | ||
Feature(name="conv_rate", dtype=ValueType.FLOAT), | ||
Feature(name="acc_rate", dtype=ValueType.FLOAT), | ||
Feature(name="avg_daily_trips", dtype=ValueType.INT64), | ||
], | ||
online=True, | ||
input=driver_hourly_stats, | ||
tags={}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import os | ||
import tempfile | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
|
||
import tests.driver_test_data as driver_data | ||
from tests.cli.utils import CliRunner, get_example_repo | ||
|
||
|
||
def _get_last_feature_row(df: pd.DataFrame, driver_id): | ||
""" Manually extract last feature value from a dataframe for a given driver_id """ | ||
filtered = df[df["driver_id"] == driver_id] | ||
max_ts = filtered.loc[filtered["datetime"].idxmax()]["datetime"] | ||
filtered_by_ts = filtered[filtered["datetime"] == max_ts] | ||
return filtered_by_ts.loc[filtered_by_ts["created"].idxmax()] | ||
|
||
|
||
class TestLocalEndToEnd: | ||
def test_basic(self) -> None: | ||
""" | ||
1. Create a repo. | ||
2. Apply | ||
3. Ingest some data to online store from parquet | ||
4. Read from the online store to make sure it made it there. | ||
""" | ||
|
||
runner = CliRunner() | ||
with tempfile.TemporaryDirectory() as data_dir: | ||
|
||
# Generate some test data in parquet format. | ||
end_date = datetime.now().replace(microsecond=0, second=0, minute=0) | ||
start_date = end_date - timedelta(days=15) | ||
|
||
driver_entities = [1001, 1002, 1003, 1004, 1005] | ||
driver_df = driver_data.create_driver_hourly_stats_df( | ||
driver_entities, start_date, end_date | ||
) | ||
|
||
driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") | ||
driver_df.to_parquet( | ||
path=driver_stats_path, allow_truncated_timestamps=True | ||
) | ||
|
||
# Note that runner takes care of running apply/teardown for us here. | ||
# We patch python code in example_feature_repo_2.py to set the path to Parquet files. | ||
with runner.local_repo( | ||
get_example_repo("example_feature_repo_2.py").replace( | ||
"%PARQUET_PATH%", driver_stats_path | ||
) | ||
) as store: | ||
|
||
assert store.repo_path is not None | ||
|
||
# feast materialize | ||
r = runner.run( | ||
[ | ||
"materialize", | ||
str(store.repo_path), | ||
start_date.isoformat(), | ||
end_date.isoformat(), | ||
], | ||
cwd=Path(store.repo_path), | ||
) | ||
|
||
assert r.returncode == 0 | ||
|
||
# Read features back | ||
result = store.get_online_features( | ||
feature_refs=[ | ||
"driver_hourly_stats:conv_rate", | ||
"driver_hourly_stats:avg_daily_trips", | ||
], | ||
entity_rows=[{"driver_id": 1001}], | ||
) | ||
|
||
assert "driver_hourly_stats:avg_daily_trips" in result.to_dict() | ||
|
||
assert "driver_hourly_stats:conv_rate" in result.to_dict() | ||
assert ( | ||
abs( | ||
result.to_dict()["driver_hourly_stats:conv_rate"][0] | ||
- _get_last_feature_row(driver_df, 1001)["conv_rate"] | ||
) | ||
< 0.01 | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.