In [1]:
!feast init online_demo


Creating a new Feast repository in [1m[32m/Users/franciscojavierarceo/GitHub/feast/examples/data-quality-monitoring/online_demo[0m.



In [2]:
cd online_demo/feature_repo/

/Users/franciscojavierarceo/GitHub/feast/examples/data-quality-monitoring/online_demo/feature_repo


In [11]:
x = '''from datetime import timedelta

import pandas as pd
from feast import (
    Entity,
    FeatureView,
    FeatureService,
    Field,
    FileSource,
    PushSource,
    RequestSource,
    ValueType,
)
from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import Float32, Float64, Int64, String

driver_hourly_stats = FileSource(
    path="../../driver_stats_with_string.parquet",
    timestamp_field="event_timestamp",
    created_timestamp_column="created",
    description="A table describing the stats of a driver, such as the average daily number of trips.",
    owner="test@gmail.com",
)

global_features = FileSource(
    path="../../global_features.parquet",
    timestamp_field="event_timestamp",
    created_timestamp_column="created",
    description="A table with global features around drivers.",
    owner="test@gmail.com",
)

driver_stats_push_source = PushSource(
    name="driver_stats_push_source",
    batch_source=driver_hourly_stats,
)

driver = Entity(
    name="driver",
    join_keys=["driver_id"],
    description="driver id",
)

driver_hourly_stats_view = FeatureView(
    name="driver_hourly_stats",
    entities=[driver],
    ttl=timedelta(seconds=8640000000),
    schema=[
        Field(name="driver_id", dtype=Int64),
        Field(name="conv_rate", dtype=Float32),
        Field(name="acc_rate", dtype=Float32),
        Field(name="avg_daily_trips", dtype=Int64),
        Field(name="string_feature", dtype=String),
    ],
    online=True,
    source=driver_stats_push_source,
    tags={"production": "True"},
    owner="test2@gmail.com",
)

global_features_view = FeatureView(
    name="global_driver_features",
    entities=[],
    ttl=timedelta(seconds=8640000000),
    schema=[
        Field(name="total_trips_today_by_all_drivers", dtype=Float32),
    ],
    online=True,
    source=global_features,
    tags={"production": "True"},
    owner="test2@gmail.com",
)

# Define a request data source which encodes features / information only
# available at request time (e.g. part of the user initiated HTTP request)
input_request = RequestSource(
    name="vals_to_add",
    schema=[
        Field(name="val_to_add", dtype=Int64),
        Field(name="val_to_add_2", dtype=Int64),
    ],
)


# Define an on demand feature view which can generate new features based on
# existing feature views and RequestSource features
@on_demand_feature_view(
    sources=[driver_hourly_stats_view, input_request],
    schema=[
        Field(name="conv_rate_plus_val1", dtype=Float64),
        Field(name="conv_rate_plus_val2", dtype=Float64),
    ],
)
def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
    df = pd.DataFrame()
    df["conv_rate_plus_val1"] = inputs["conv_rate"] + inputs["val_to_add"]
    df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"]
    return df


feature_service = FeatureService(
    name="convrate_plus100",
    features=[
        driver_hourly_stats_view[["conv_rate", "avg_daily_trips"]],
        transformed_conv_rate,
    ],
    owner="test3@gmail.com",
)'''

In [17]:
! rm example_repo.py

In [18]:
f = open('features.py', 'w')
f.write(x)
f.close()

In [19]:
fyaml = '''project: feast_demo
provider: local
online_store:
  type: sqlite
  path: data/online_store.db
offline_store:
  type: file
entity_key_serialization_version: 2
flags:
  alpha_features: true
  on_demand_transforms: true
'''

f = open('feature_store.yaml', 'w')
f.write(fyaml)
f.close()

In [21]:
! feast apply

Created entity [1m[32mdriver[0m
Created feature view [1m[32mglobal_driver_features[0m
Created feature view [1m[32mdriver_hourly_stats[0m
Created on demand feature view [1m[32mtransformed_conv_rate[0m
Created feature service [1m[32mconvrate_plus100[0m

Created sqlite table [1m[32mfeast_demo_driver_hourly_stats[0m
Created sqlite table [1m[32mfeast_demo_global_driver_features[0m



In [22]:
from datetime import datetime

import grpc
import pandas as pd
from feast import FeatureStore
from feast.protos.feast.serving.ServingService_pb2 import (
    FeatureList,
    GetOnlineFeaturesRequest,
)
from feast.protos.feast.serving.ServingService_pb2_grpc import ServingServiceStub
from feast.protos.feast.types.Value_pb2 import RepeatedValue, Value


# Sample logic to fetch from a local gRPC java server deployed at 6566
def fetch_java():
    channel = grpc.insecure_channel("localhost:6566")
    stub = ServingServiceStub(channel)
    feature_refs = FeatureList(val=["driver_hourly_stats:conv_rate"])
    entity_rows = {
        "driver_id": RepeatedValue(
            val=[Value(int64_val=driver_id) for driver_id in range(1001, 1003)]
        )
    }

    print(
        stub.GetOnlineFeatures(
            GetOnlineFeaturesRequest(
                features=feature_refs,
                entities=entity_rows,
            )
        )
    )


def run_demo():
    store = FeatureStore(repo_path=".")

    print("--- Historical features ---")
    entity_df = pd.DataFrame.from_dict(
        {
            "driver_id": [1001, 1002, 1003, 1004],
            "event_timestamp": [
                datetime(2021, 4, 12, 10, 59, 42),
                datetime(2021, 4, 12, 8, 12, 10),
                datetime(2021, 4, 12, 16, 40, 26),
                datetime(2021, 4, 12, 15, 1, 12),
            ],
            "val_to_add": [1, 2, 3, 4],
            "val_to_add_2": [10, 20, 30, 40],
        }
    )
    training_df = store.get_historical_features(
        entity_df=entity_df,
        features=[
            "driver_hourly_stats:string_feature",
            "driver_hourly_stats:conv_rate",
            "driver_hourly_stats:acc_rate",
            "driver_hourly_stats:avg_daily_trips",
            "transformed_conv_rate:conv_rate_plus_val1",
            "transformed_conv_rate:conv_rate_plus_val2",
        ],
    ).to_df()
    print(training_df.head())

    print("\n--- Online features ---")
    features = store.get_online_features(
        features=[
            "driver_hourly_stats:string_feature",
            "driver_hourly_stats:acc_rate",
            "driver_hourly_stats:avg_daily_trips",
            "transformed_conv_rate:conv_rate_plus_val1",
            "transformed_conv_rate:conv_rate_plus_val2",
        ],
        entity_rows=[
            {
                "driver_id": 1001,
                "val_to_add": 1000,
                "val_to_add_2": 2000,
            }
        ],
    ).to_dict()
    for key, value in sorted(features.items()):
        print(key, " : ", value)

    print("\n--- Simulate a stream event ingestion of the hourly stats df ---")
    event_df = pd.DataFrame.from_dict(
        {
            "driver_id": [1001],
            "event_timestamp": [
                datetime(2021, 5, 13, 10, 59, 42),
            ],
            "created": [
                datetime(2021, 5, 13, 10, 59, 42),
            ],
            "conv_rate": [1.0],
            "acc_rate": [1.0],
            "avg_daily_trips": [1000],
            "string_feature": "test2",
        }
    )
    print(event_df)
    store.push("driver_stats_push_source", event_df)

    print("\n--- Online features again with updated values from a stream push---")
    features = store.get_online_features(
        features=[
            "driver_hourly_stats:string_feature",
            "driver_hourly_stats:acc_rate",
            "driver_hourly_stats:avg_daily_trips",
            "transformed_conv_rate:conv_rate_plus_val1",
            "transformed_conv_rate:conv_rate_plus_val2",
        ],
        entity_rows=[
            {
                "driver_id": 1001,
                "val_to_add": 1000,
                "val_to_add_2": 2000,
            }
        ],
    ).to_dict()
    for key, value in sorted(features.items()):
        print(key, " : ", value)

    print("\n--- Online features retrieved through a feature service---")
    features = store.get_online_features(
        features=store.get_feature_service("convrate_plus100"),
        entity_rows=[
            {
                "driver_id": 1001,
                "val_to_add": 1000,
                "val_to_add_2": 2000,
            }
        ],
    ).to_dict()
    for key, value in sorted(features.items()):
        print(key, " : ", value)


if __name__ == "__main__":
    run_demo()



--- Historical features ---
   driver_id           event_timestamp  val_to_add  val_to_add_2  \
0       1001 2021-04-12 10:59:42+00:00           1            10   
1       1002 2021-04-12 08:12:10+00:00           2            20   
2       1003 2021-04-12 16:40:26+00:00           3            30   
3       1004 2021-04-12 15:01:12+00:00           4            40   

  string_feature  conv_rate  acc_rate  avg_daily_trips  conv_rate_plus_val1  \
0           test   0.701558  0.195824              566             1.701558   
1           test   0.775499  0.947109              890             2.775499   
2           test   0.186658  0.245490              971             3.186658   
3           test   0.891017  0.118256              154             4.891017   

   conv_rate_plus_val2  
0            10.701558  
1            20.775499  
2            30.186658  
3            40.891017  

--- Online features ---
acc_rate  :  [None]
avg_daily_trips  :  [None]
conv_rate_plus_val1  :  [None]
conv_ra