# Experiment

## Install Python dependencies

In [1]:
!pip install -q onnx onnxruntime tf2onnx

Import the dependencies for the model training code:

In [2]:
import datetime
import numpy as np
import onnx
import pandas as pd
import pickle
import tf2onnx

from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from pathlib import Path

2024-09-26 14:36:13.683862: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-26 14:36:13.683933: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-26 14:36:13.685206: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-26 14:36:13.692720: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


The output might show TensorFlow messages, such as a "Could not find TensorRT" warning. You can ignore these messages.


## Load the CSV data

The CSV data that you use to train the model contains the following fields:

* **distancefromhome** - The distance from home where the transaction happened.
* **distancefromlast_transaction** - The distance from the last transaction that happened.
* **ratiotomedianpurchaseprice** - The ratio of purchased price compared to median purchase price.
* **repeat_retailer** - If it's from a retailer that already has been purchased from before.
* **used_chip** - If the credit card chip was used.
* **usedpinnumber** - If the PIN number was used.
* **online_order** - If it was an online order.
* **fraud** - If the transaction is fraudulent.

## Install Feast project

In [3]:
!pip install -q --upgrade pip
!pip install -q feast
!pip install -q psycopg==3.2.2
!pip install -q psycopg-pool==3.2.3
!pip list | grep psyco
!feast version

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
kfp 2.5.0 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.5 which is incompatible.
kfp-kubernetes 1.0.0 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.5 which is incompatible.
kfp-pipeline-spec 0.2.2 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.5 which is incompatible.
tf2onnx 1.16.1 requires protobuf~=3.20, but you have protobuf 4.25.5 which is incompatible.[0m[31m
[0mpsycopg                      3.2.2
psycopg-pool                 3.2.3
Feast SDK Version: "0.40.1"


In [4]:
# Forward Feast logs to the notebook output
import logging
import sys
from io import StringIO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

In [5]:
%env FEAST_REPO=feast_fraud/feature_repo
%env ROOT_DIR=/opt/app-root/src/fraud-detection
!ls $FEAST_REPO

env: FEAST_REPO=feast_fraud/feature_repo
env: ROOT_DIR=/opt/app-root/src/fraud-detection
data		    offline_store.yaml	registry_store.yaml
feature_store.yaml  online_store.yaml


In [6]:
!feast -c $FEAST_REPO apply
!feast -c $FEAST_REPO entities list
!feast -c $FEAST_REPO data-sources list
!feast -c $FEAST_REPO feature-views list

Removing infrastructure for [1m[31mvalidation_fv[0m
Removing infrastructure for [1m[31mtraining_fv[0m
Removing infrastructure for [1m[31mtest_fv[0m
NAME    DESCRIPTION    TYPE
NAME    CLASS
NAME    ENTITIES    TYPE


In [63]:
# common imports
from datetime import timedelta

import pandas as pd
import os

from feast import (
    Entity,
    FeatureView,
    Field,
)
from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (
    PostgreSQLSource,
)
from feast.feature_store import FeatureStore
from feast.feature_logging import LoggingConfig
from feast.infra.offline_stores.file_source import FileLoggingDestination
from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import Float32, Float64, Int64

## Data preparation
~~1. Add entity key to the tables~~
1. Add event_ts field
1. Convert to parquet files

~~Since the original dataset was not considering the `customer` concept, we'll replicate the same setup: add a new field customer_id and apply the same value to each record.~~

In [49]:
from random import randrange
def add_customer_id(df):
    df['customer_id'] = 1
    # df['customer_id'] = randrange(10000)

def add_timestamps(df):
    # Create time series: one entry every 1H, up to now
    timestamps = pd.date_range(
        end=pd.Timestamp.now().replace(microsecond=0), 
        periods=len(df), 
        freq='1H').to_frame(name="ts", index=False)

    timestamps['created'] = timestamps['ts']
    df = pd.concat(objs=[df, timestamps], axis=1)
    columns = df.columns.tolist()
    columns.insert(0, columns.pop(8))
    columns.insert(1, columns.pop(9))
    columns.insert(2, columns.pop(10))
    return df[columns]

In [50]:
xtrain = pd.read_csv('data/train.csv')
xval = pd.read_csv('data/validate.csv')
xtest = pd.read_csv('data/test.csv')

add_customer_id(xtrain)
add_customer_id(xval)
add_customer_id(xtest)

xtrain = add_timestamps(xtrain)
xval = add_timestamps(xval)
xtest = add_timestamps(xtest)

!rm data/*.parquet
xtrain.to_parquet('data/train.parquet')
xval.to_parquet('data/validate.parquet')
xtest.to_parquet('data/test.parquet')

print("-----xtrain-----")
xtrain.info()
print("-----len(xtrain)-----")
print(len(xtrain))
print("-----len(xval)-----")
print(len(xval))
print("-----len(xtest)-----")
print(len(xtest))

  timestamps = pd.date_range(
  timestamps = pd.date_range(
  timestamps = pd.date_range(


-----xtrain-----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600000 entries, 0 to 599999
Data columns (total 11 columns):
 #   Column                          Non-Null Count   Dtype         
---  ------                          --------------   -----         
 0   customer_id                     600000 non-null  int64         
 1   ts                              600000 non-null  datetime64[ns]
 2   created                         600000 non-null  datetime64[ns]
 3   distance_from_home              600000 non-null  float64       
 4   distance_from_last_transaction  600000 non-null  float64       
 5   ratio_to_median_purchase_price  600000 non-null  float64       
 6   repeat_retailer                 600000 non-null  float64       
 7   used_chip                       600000 non-null  float64       
 8   used_pin_number                 600000 non-null  float64       
 9   online_order                    600000 non-null  float64       
 10  fraud                           600000 

In [51]:
!ls -lh data/*parquet

-rw-r--r--. 1 1001210000 1001210000 8.8M Sep 26 16:25 data/test.parquet
-rw-r--r--. 1 1001210000 1001210000  24M Sep 26 16:25 data/train.parquet
-rw-r--r--. 1 1001210000 1001210000 8.8M Sep 26 16:25 data/validate.parquet


## Define a SQL store
A Postgres service is deployed on the current namespace and DB tables are created and populated with data from the `xtrain`, `xval` and `xtest` data frames

In [52]:
namespace_path='/var/run/secrets/kubernetes.io/serviceaccount/namespace'
with open(namespace_path, "r") as file:
    current_namespace = file.read().strip()
print(f"Current namespace is {current_namespace}")
os.environ['CURRENT_NS'] = current_namespace
!echo $CURRENT_NS

Current namespace is dmartino-fraud-detection
dmartino-fraud-detection


### Deploy PostgreSQL from template
From the OpenShift console, create an instance of PostgreSQL database with the following options in the current namespace:
* DATABASE_SERVICE_NAME=postgresql 
* POSTGRESQL_USER=feast 
* POSTGRESQL_PASSWORD=feast
* POSTGRESQL_DATABASE=feast 
* VOLUME_CAPACITY=2Gi 
* MEMORY_LIMIT=1Gi

In [53]:
# Setup DB connection attributes
psqlHost = f'postgresql.{current_namespace}.svc.cluster.local'
psqlPort = 5432
psqlUsername = 'feast'
psqlPassword = 'feast'
psqlDb = 'feast'
psqlSchema = 'public'

In [56]:
%%time

# Load DataFrame to DB using `to_sql` method of pandas DataFrame
import psycopg
from sqlalchemy import create_engine, text, select, func, Column, BigInteger, Float, TIMESTAMP, MetaData, Table
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
from sqlalchemy.exc import ProgrammingError

engine = create_engine(f'postgresql+psycopg://{psqlUsername}:{psqlPassword}@{psqlHost}:{str(psqlPort)}/{psqlDb}')
metadata = MetaData()
metadata.reflect(bind=engine)

for t in ['fraud_train', 'fraud_validate', 'fraud_test']:
    if t in metadata.tables:
        table = metadata.tables[t]
        with engine.connect() as connection:
            with connection.begin():
                row_count = connection.execute(select(func.count()).select_from(text(t))).scalar()
                print(f"Deleting {row_count} rows from {t}")
                connection.execute(table.delete())
                print(f"Dropping {t}")
                connection.execute(table.drop(bind=engine))
    print(f"Creating {t}")
    table = Table(
        t, metadata,
        Column('customer_id', BigInteger, primary_key=True),
        Column('ts', TIMESTAMP, primary_key=True),
        Column('created', TIMESTAMP),
        Column('distance_from_home', DOUBLE_PRECISION),
        Column('distance_from_last_transaction', DOUBLE_PRECISION),
        Column('ratio_to_median_purchase_price', DOUBLE_PRECISION),
        Column('repeat_retailer', DOUBLE_PRECISION),
        Column('used_chip', DOUBLE_PRECISION),
        Column('used_pin_number', DOUBLE_PRECISION),
        Column('online_order', DOUBLE_PRECISION),
        Column('fraud', DOUBLE_PRECISION)
    )

metadata.create_all(engine)    

print("Persisting xtrain...")
xtrain.to_sql('fraud_train', engine, if_exists='append', index=False, schema=psqlSchema)
print("Persisting xval...")
xval.to_sql('fraud_validate', engine, if_exists='append', index=False, schema=psqlSchema)
print("Persisting xtest...")
xtest.to_sql('fraud_test', engine, if_exists='append', index=False, schema=psqlSchema)

Creating fraud_train
Creating fraud_validate
Creating fraud_test
Persisting xtrain...
Persisting xval...
Persisting xtest...
CPU times: user 1min 7s, sys: 22.7 s, total: 1min 30s
Wall time: 4min 42s


-1

In [57]:
# Validate row count
for t in ['fraud_train', 'fraud_validate', 'fraud_test']:
    if t in metadata.tables:
        table = metadata.tables[t]
        with engine.connect() as connection:
            row_count = connection.execute(select(func.count()).select_from(text(t))).scalar()
            print(f"Counted rows in {t}: {row_count}")

Counted rows in fraud_train: 600000
Counted rows in fraud_validate: 200000
Counted rows in fraud_test: 200000


## Define the Feature Store
* Map parquet files to `PostgreSQLSource`s
* Define FeatureViews for training purposes
....

**Note**: we cannot apply feature store definitions from the remote servers because of GH issue [4592: Remote apply](https://github.com/feast-dev/feast/issues/4529), so we use a direct connection to the DB

In [58]:
!cat $FEAST_REPO/feature_store.yaml

project: feast_fraud
registry:
    registry_type: sql
    path: postgresql+psycopg://feast:feast@postgresql:5432/feast
    cache_ttl_seconds: 60
    sqlalchemy_config_kwargs:
        echo: false
        pool_pre_ping: true
online_store:
    type: postgres
    host: postgresql
    port: 5432
    database: feast
    db_schema: public
    user: feast
    password: feast
offline_store:
    type: postgres
    host: postgresql
    port: 5432
    database: feast
    db_schema: public
    user: feast
    password: feast
entity_key_serialization_version: 2


In [59]:
# Initialize the store
store = FeatureStore(os.environ['FEAST_REPO'])
print(store.list_entities())

[]


In [64]:
%%time
# Create the PostgreSQLSource
train_source = PostgreSQLSource(
    name="train_source",
    query="SELECT * FROM fraud_train",
    timestamp_field="ts",
    created_timestamp_column="created",
)
validate_source = PostgreSQLSource(
    name="validate_source",
    query="SELECT * FROM fraud_validate",
    timestamp_field="ts",
    created_timestamp_column="created",
)
test_source = PostgreSQLSource(
    name="test_source",
    query="SELECT * FROM fraud_test",
    timestamp_field="ts",
    created_timestamp_column="created",
)
store.registry.apply_data_source(train_source, store.project)
store.registry.apply_data_source(validate_source, store.project)
store.registry.apply_data_source(test_source, store.project)
!feast -c $FEAST_REPO data-sources list

NAME             CLASS
train_source     <class 'feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource'>
validate_source  <class 'feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource'>
test_source      <class 'feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource'>
CPU times: user 32.5 ms, sys: 24.6 ms, total: 57.1 ms
Wall time: 3.16 s


In [65]:
# Customer entity
customer = Entity(name="customer", join_keys=["customer_id"])
store.registry.apply_entity(customer, store.project)
!feast -c $FEAST_REPO entities list

NAME      DESCRIPTION    TYPE
customer                 ValueType.UNKNOWN


In [80]:
%%time
training_fv = FeatureView(
    name="training_fv",
    entities=[customer],
    # entities=[],
    ttl=timedelta(days=1),
    schema=[
        Field(name="customer_id", dtype=Int64),
        Field(name="distance_from_last_transaction", dtype=Float64),
        Field(name="ratio_to_median_purchase_price", dtype=Float64),
        Field(name="used_chip", dtype=Float64),
        Field(name="used_pin_number", dtype=Float64),
        Field(name="online_order", dtype=Float64),
        Field(name="fraud", dtype=Float64),
    ],
    online=True,
    source=train_source,
    tags={"team": "training"},
)
validation_fv = FeatureView(
    name="validation_fv",
    entities=[customer],
    # entities=[],
    ttl=timedelta(days=1),
    schema=[
        Field(name="customer_id", dtype=Int64),
        Field(name="distance_from_last_transaction", dtype=Float64),
        Field(name="ratio_to_median_purchase_price", dtype=Float64),
        Field(name="used_chip", dtype=Float64),
        Field(name="used_pin_number", dtype=Float64),
        Field(name="online_order", dtype=Float64),
        Field(name="fraud", dtype=Float64),
    ],
    online=True,
    source=validate_source,
    tags={"team": "training"},
)
test_fv = FeatureView(
    name="test_fv",
    entities=[customer],
    # entities=[],
    ttl=timedelta(days=1),
    schema=[
        Field(name="customer_id", dtype=Int64),
        Field(name="distance_from_last_transaction", dtype=Float64),
        Field(name="ratio_to_median_purchase_price", dtype=Float64),
        Field(name="used_chip", dtype=Float64),
        Field(name="used_pin_number", dtype=Float64),
        Field(name="online_order", dtype=Float64),
        Field(name="fraud", dtype=Float64),
    ],
    online=True,
    source=test_source,
    tags={"team": "training"},
)
store.registry.apply_feature_view(training_fv, store.project)
store.registry.apply_feature_view(validation_fv, store.project)
store.registry.apply_feature_view(test_fv, store.project)
!feast -c $FEAST_REPO feature-views list

NAME           ENTITIES      TYPE
training_fv    {'customer'}  FeatureView
validation_fv  {'customer'}  FeatureView
test_fv        {'customer'}  FeatureView
CPU times: user 21.5 ms, sys: 33.7 ms, total: 55.2 ms
Wall time: 3.17 s


In [81]:
fv = store.registry.get_feature_view('training_fv', store.project)
fv

<FeatureView(name = training_fv, entities = ['customer'], ttl = 1 day, 0:00:00, stream_source = None, batch_source = {
  "type": "CUSTOM_SOURCE",
  "timestampField": "ts",
  "createdTimestampColumn": "created",
  "customOptions": {
    "configuration": "eyJuYW1lIjogInRyYWluX3NvdXJjZSIsICJxdWVyeSI6ICJTRUxFQ1QgKiBGUk9NIGZyYXVkX3RyYWluIiwgInRhYmxlIjogIiJ9"
  },
  "dataSourceClassType": "feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource",
  "name": "train_source"
}, entity_columns = [customer_id-Int64], features = [distance_from_last_transaction-Float64, ratio_to_median_purchase_price-Float64, used_chip-Float64, used_pin_number-Float64, online_order-Float64, fraud-Float64], description = , tags = {'team': 'training'}, owner = , projection = FeatureViewProjection(name='training_fv', name_alias=None, desired_features=[], features=[distance_from_last_transaction-Float64, ratio_to_median_purchase_price-Float64, used_chip-Float64, used_pin_number-Float64

## Start Feast services
A fully distributed Feast environment is deployed using Helm:
* Registry
* Online Store
* Offline Store

Run the following commands from a local clone of this git repo.

Generate base64 encoded feature configurations
```console
REGISTRY_CONFIG_BASE64=$(cat feast_fraud/feature_repo/registry_store.yaml | base64 -w0)
ONLINE_CONFIG_BASE64=$(cat feast_fraud/feature_repo/online_store.yaml | base64 -w0)
OFFLINE_CONFIG_BASE64=$(cat feast_fraud/feature_repo/offline_store.yaml | base64 -w0)
```

Initialize the image settings:
```console
FEAST_IMAGE_REPO=feastdev/feature-server
FEAST_IMAGE_VERSION=latest
```

Setup the Helm repository:
```console
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```

Login to the cluster and set the current project as the default.

Then run the following command to install the Registry server:
```console
helm upgrade --install feast-registry feast-charts/feast-feature-server \
--set fullnameOverride=registry-server --set feast_mode=registry \
--set image.repository=${FEAST_IMAGE_REPO} --set image.tag=${FEAST_IMAGE_VERSION} \
--set feature_store_yaml_base64=$REGISTRY_CONFIG_BASE64

oc wait --for=condition=available deployment/registry-server --timeout=2m
```

Run the following command to install the Offline server:
```console
helm upgrade --install feast-offline feast-charts/feast-feature-server \
--set fullnameOverride=offline-server --set feast_mode=offline \
--set image.repository=${FEAST_IMAGE_REPO} --set image.tag=${FEAST_IMAGE_VERSION} \
--set feature_store_yaml_base64=$OFFLINE_CONFIG_BASE64

oc wait --for=condition=available deployment/offline-server --timeout=2m
```

Run the following command to install the Online server:
```console
helm upgrade --install feast-online feast-charts/feast-feature-server \
--set fullnameOverride=online-server --set feast_mode=online \
--set image.repository=${FEAST_IMAGE_REPO} --set image.tag=${FEAST_IMAGE_VERSION} \
--set feature_store_yaml_base64=$ONLINE_CONFIG_BASE64

oc wait --for=condition=available deployment/online-server --timeout=2m
```


## Model training

In [69]:
store = FeatureStore("feast_fraud/client")
for ds in store.list_data_sources():
    print(f"DataSource: {type(ds).__name__}/{ds.name}")
for fv in store.list_feature_views():
    print(f"FeatureView: {type(fv).__name__}/{fv.name}")

2024-09-26 16:34:59,341 - list_feature_views will make breaking changes. Please use list_batch_feature_views instead. list_feature_views will behave like list_all_feature_views in the future.
2024-09-26 16:34:59,342 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.


DataSource: PostgreSQLSource/train_source
DataSource: PostgreSQLSource/validate_source
DataSource: PostgreSQLSource/test_source
FeatureView: FeatureView/training_fv
FeatureView: FeatureView/validation_fv
FeatureView: FeatureView/test_fv


In [86]:
%%time
def fetch_historical_data(fv_name, df):
    # Fetch historical data
    # TODO: how to fetch real timestamps?
    datetimes = df['ts'].dt.to_pydatetime().tolist()
    entity_df = pd.DataFrame.from_dict(
        {
            "customer_id": [1] * len(datetimes),
            "event_timestamp": datetimes,
        }
    )
    print(f"Fetching {len(datetimes)} historical rows from {fv_name}")
    
    features=[
        f"{fv_name}:distance_from_last_transaction",
        f"{fv_name}:ratio_to_median_purchase_price",
        f"{fv_name}:used_chip",
        f"{fv_name}:used_pin_number",
        f"{fv_name}:online_order",
        f"{fv_name}:fraud",
    ]

    historical_df = pd.DataFrame()
    batch_size = 10000
    offset = 0
    while offset < len(entity_df):
        end_index = min(len(entity_df), offset + batch_size)
        print(f"Fetching rows from {offset} to {end_index}")
        batch_entity_df = pd.DataFrame.from_dict(
            {
                "customer_id": [1] * (end_index - offset),
                "event_timestamp": entity_df['event_timestamp'][offset: end_index],
            }
        )

        offset += batch_size
        batch_df = store.get_historical_features(
            entity_df=batch_entity_df,
            features=features,
        ).to_df()
        historical_df = pd.concat([historical_df, batch_df], ignore_index=True)
    
    return historical_df

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10.5 µs


In [None]:
%%time
test_df = fetch_historical_data('test_fv', xtest)

2024-09-26 16:44:52,692 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:44:52,716 - Connecting FlightClient at grpc://offline-server:80


Fetching 200000 historical rows from test_fv
Fetching rows from 0 to 10000


2024-09-26 16:45:13,043 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:45:13,064 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 10000 to 20000


2024-09-26 16:45:33,273 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:45:33,293 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 20000 to 30000


2024-09-26 16:45:53,437 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:45:53,458 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 30000 to 40000


2024-09-26 16:46:13,684 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:46:13,704 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 40000 to 50000


2024-09-26 16:46:33,970 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:46:33,990 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 50000 to 60000


2024-09-26 16:46:54,093 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:46:54,113 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 60000 to 70000


2024-09-26 16:47:14,299 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:47:14,320 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 70000 to 80000


2024-09-26 16:47:34,584 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:47:34,605 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 80000 to 90000


2024-09-26 16:47:54,870 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.


Fetching rows from 90000 to 100000


2024-09-26 16:47:55,200 - Connecting FlightClient at grpc://offline-server:80
2024-09-26 16:48:15,348 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:48:15,369 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 100000 to 110000


2024-09-26 16:48:35,616 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:48:35,637 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 110000 to 120000


2024-09-26 16:48:55,845 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:48:55,866 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 120000 to 130000


2024-09-26 16:49:16,039 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:49:16,061 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 130000 to 140000


2024-09-26 16:49:36,376 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:49:36,396 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 140000 to 150000


2024-09-26 16:49:56,535 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:49:56,556 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 150000 to 160000


2024-09-26 16:50:17,041 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:50:17,063 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 160000 to 170000


2024-09-26 16:50:37,261 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:50:37,281 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 170000 to 180000


In [88]:
%%time
validate_df = fetch_historical_data('validation_fv', xval)

2024-09-26 16:52:39,573 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:52:39,593 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 30000 to 40000


2024-09-26 16:53:00,197 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:53:00,216 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 40000 to 50000


2024-09-26 16:53:20,696 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:53:20,716 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 50000 to 60000


2024-09-26 16:53:41,060 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:53:41,080 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 60000 to 70000


2024-09-26 16:54:01,678 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:54:01,698 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 70000 to 80000


2024-09-26 16:54:22,202 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:54:22,221 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 80000 to 90000


2024-09-26 16:54:42,685 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:54:42,705 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 90000 to 100000


2024-09-26 16:55:03,213 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:55:03,233 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 100000 to 110000


2024-09-26 16:55:23,843 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:55:23,863 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 110000 to 120000


2024-09-26 16:55:44,410 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:55:44,430 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 120000 to 130000


2024-09-26 16:56:04,882 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:56:04,903 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 130000 to 140000


2024-09-26 16:56:25,490 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:56:25,510 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 140000 to 150000


2024-09-26 16:56:46,033 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:56:46,054 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 150000 to 160000


2024-09-26 16:57:06,481 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:57:06,501 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 160000 to 170000


2024-09-26 16:57:27,006 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:57:27,029 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 170000 to 180000


2024-09-26 16:57:47,406 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:57:47,427 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 180000 to 190000


2024-09-26 16:58:07,955 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:58:07,976 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 190000 to 200000
CPU times: user 1.05 s, sys: 358 ms, total: 1.41 s
Wall time: 6min 50s


In [89]:
%%time
training_df = fetch_historical_data('training_fv', xtrain)

2024-09-26 16:58:29,753 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:58:29,786 - Connecting FlightClient at grpc://offline-server:80


Fetching 600000 historical rows from training_fv
Fetching rows from 0 to 10000


2024-09-26 16:58:48,769 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:58:48,789 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 10000 to 20000


2024-09-26 16:59:07,910 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:59:07,931 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 20000 to 30000


2024-09-26 16:59:27,041 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:59:27,062 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 30000 to 40000


2024-09-26 16:59:46,129 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 16:59:46,150 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 40000 to 50000


2024-09-26 17:00:05,452 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:00:05,472 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 50000 to 60000


2024-09-26 17:00:24,628 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:00:24,649 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 60000 to 70000


2024-09-26 17:00:43,819 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:00:43,838 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 70000 to 80000


2024-09-26 17:01:02,868 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.


Fetching rows from 80000 to 90000


2024-09-26 17:01:03,175 - Connecting FlightClient at grpc://offline-server:80
2024-09-26 17:01:22,317 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:01:22,338 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 90000 to 100000


2024-09-26 17:01:41,506 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:01:41,526 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 100000 to 110000


2024-09-26 17:02:00,532 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:02:00,553 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 110000 to 120000


2024-09-26 17:02:19,676 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:02:19,695 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 120000 to 130000


2024-09-26 17:02:38,863 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:02:38,883 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 130000 to 140000


2024-09-26 17:02:57,960 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:02:57,982 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 140000 to 150000


2024-09-26 17:03:17,014 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:03:17,036 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 150000 to 160000


2024-09-26 17:03:36,121 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:03:36,142 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 160000 to 170000


2024-09-26 17:03:55,315 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:03:55,336 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 170000 to 180000


2024-09-26 17:04:14,333 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:04:14,353 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 180000 to 190000


2024-09-26 17:04:33,473 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:04:33,493 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 190000 to 200000


2024-09-26 17:04:52,717 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:04:52,737 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 200000 to 210000


2024-09-26 17:05:11,976 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:05:11,998 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 210000 to 220000


2024-09-26 17:05:31,016 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:05:31,036 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 220000 to 230000


2024-09-26 17:05:50,172 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:05:50,193 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 230000 to 240000


2024-09-26 17:06:09,352 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:06:09,373 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 240000 to 250000


2024-09-26 17:06:28,365 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:06:28,386 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 250000 to 260000


2024-09-26 17:06:47,583 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:06:47,604 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 260000 to 270000


2024-09-26 17:07:06,713 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:07:06,734 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 270000 to 280000


2024-09-26 17:07:25,842 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:07:25,863 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 280000 to 290000


2024-09-26 17:07:44,873 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:07:44,894 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 290000 to 300000


2024-09-26 17:08:04,066 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:08:04,087 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 300000 to 310000


2024-09-26 17:08:23,189 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:08:23,210 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 310000 to 320000


2024-09-26 17:08:42,350 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:08:42,370 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 320000 to 330000


2024-09-26 17:09:01,559 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:09:01,580 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 330000 to 340000


2024-09-26 17:09:20,672 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:09:20,692 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 340000 to 350000


2024-09-26 17:09:39,822 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:09:39,843 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 350000 to 360000


2024-09-26 17:09:58,942 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:09:58,961 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 360000 to 370000


2024-09-26 17:10:18,318 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:10:18,339 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 370000 to 380000


2024-09-26 17:10:37,485 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:10:37,506 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 380000 to 390000


2024-09-26 17:10:56,630 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:10:56,650 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 390000 to 400000


2024-09-26 17:11:15,592 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:11:15,613 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 400000 to 410000


2024-09-26 17:11:34,757 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:11:34,777 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 410000 to 420000


2024-09-26 17:11:53,857 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:11:53,876 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 420000 to 430000


2024-09-26 17:12:12,933 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.


Fetching rows from 430000 to 440000


2024-09-26 17:12:13,238 - Connecting FlightClient at grpc://offline-server:80
2024-09-26 17:12:32,281 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:12:32,300 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 440000 to 450000


2024-09-26 17:12:51,376 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:12:51,397 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 450000 to 460000


2024-09-26 17:13:10,401 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:13:10,421 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 460000 to 470000


2024-09-26 17:13:29,558 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:13:29,579 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 470000 to 480000


2024-09-26 17:13:48,669 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:13:48,689 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 480000 to 490000


2024-09-26 17:14:07,952 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:14:07,972 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 490000 to 500000


2024-09-26 17:14:27,029 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:14:27,050 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 500000 to 510000


2024-09-26 17:14:46,130 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:14:46,150 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 510000 to 520000


2024-09-26 17:15:05,412 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:15:05,433 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 520000 to 530000


2024-09-26 17:15:24,429 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:15:24,449 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 530000 to 540000


2024-09-26 17:15:43,536 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:15:43,556 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 540000 to 550000


2024-09-26 17:16:02,614 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:16:02,634 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 550000 to 560000


2024-09-26 17:16:21,722 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:16:21,742 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 560000 to 570000


2024-09-26 17:16:40,679 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:16:40,699 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 570000 to 580000


2024-09-26 17:16:59,776 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:16:59,796 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 580000 to 590000


2024-09-26 17:17:18,955 - _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.
2024-09-26 17:17:18,976 - Connecting FlightClient at grpc://offline-server:80


Fetching rows from 590000 to 600000
CPU times: user 3.72 s, sys: 968 ms, total: 4.69 s
Wall time: 19min 9s


In [74]:
print(f"Fetched historical data: {len(training_df)}, {len(validate_df)}, {len(test_df)}")

NameError: name 'training_df' is not defined

In [None]:
training_df.info()

In [None]:
%%time

# Set the input (X) and output (Y) data. 
# The only output data is whether it's fraudulent. All other fields are inputs to the model.

feature_indexes = [
    1,  # distance_from_last_transaction
    2,  # ratio_to_median_purchase_price
    3,  # used_chip
    4,  # used_pin_number
    5,  # online_order
]

label_indexes = [
    6  # fraud
]

X_train = training_df.copy()
y_train = X_train.iloc[:, label_indexes]
X_train = X_train.iloc[:, feature_indexes]

X_val = validate_df.copy()
y_val = X_val.iloc[:, label_indexes]
X_val = X_val.iloc[:, feature_indexes]

X_test = test_df.copy()
y_test = X_test.iloc[:, label_indexes]
X_test = X_test.iloc[:, feature_indexes]


# Scale the data to remove mean and have unit variance. The data will be between -1 and 1, which makes it a lot easier for the model to learn than random (and potentially large) values.
# It is important to only fit the scaler to the training data, otherwise you are leaking information about the global distribution of variables (which is influenced by the test set) into the training set.

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train.values)

Path("artifact").mkdir(parents=True, exist_ok=True)
with open("artifact/test_data.pkl", "wb") as handle:
    pickle.dump((X_test, y_test), handle)
with open("artifact/scaler.pkl", "wb") as handle:
    pickle.dump(scaler, handle)

# Since the dataset is unbalanced (it has many more non-fraud transactions than fraudulent ones), set a class weight to weight the few fraudulent transactions higher than the many non-fraud transactions.
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train.values.ravel())
class_weights = {i : class_weights[i] for i in range(len(class_weights))}

## Build the model

The model is a simple, fully-connected, deep neural network, containing three hidden layers and one output layer.

In [None]:
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = len(feature_indexes)))
model.add(Dropout(0.2))
model.add(Dense(32))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(32))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

## Train the model

Training a model is often the most time-consuming part of the machine learning process.  Large models can take multiple GPUs for days.  Expect the training on CPU for this very simple model to take a minute or more.

In [None]:
# Train the model and get performance
import os
import time

start = time.time()
epochs = 2
history = model.fit(X_train, y_train, epochs=epochs, \
                    validation_data=(scaler.transform(X_val.values),y_val), \
                    verbose = True, class_weight = class_weights)
end = time.time()
print(f"Training of model is complete. Took {end-start} seconds")

## Save the model file

In [None]:
# Save the model as ONNX for easy use of ModelMesh
model_proto, _ = tf2onnx.convert.from_keras(model)
os.makedirs("models/fraud_feast/1", exist_ok=True)
onnx.save(model_proto, "models/fraud_feast/1/model.onnx")

The output might include TensorFlow messages related to GPUs. You can ignore these messages.

## Confirm the model file was created successfully

The output should include the model name, size, and date. 

In [None]:
! ls -alRh ./models/

## Test the model

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import pickle
import onnxruntime as rt

Load the test data and scaler:

In [None]:
with open('artifact/scaler.pkl', 'rb') as handle:
    scaler = pickle.load(handle)
with open('artifact/test_data.pkl', 'rb') as handle:
    (X_test, y_test) = pickle.load(handle)

Create an ONNX inference runtime session and predict values for all test inputs:

In [None]:
sess = rt.InferenceSession("models/fraud_feast/1/model.onnx", providers=rt.get_available_providers())
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
y_pred_temp = sess.run([output_name], {input_name: scaler.transform(X_test.values).astype(np.float32)}) 
y_pred_temp = np.asarray(np.squeeze(y_pred_temp[0]))
threshold = 0.95
y_pred = np.where(y_pred_temp > threshold, 1, 0)

Show the results:

In [None]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
import numpy as np

y_test_arr = y_test.to_numpy().squeeze()
correct = np.equal(y_pred, y_test_arr).sum().item()
acc = (correct / len(y_pred)) * 100
precision = precision_score(y_test_arr, np.round(y_pred))
recall = recall_score(y_test_arr, np.round(y_pred))

print(f"Eval Metrics: \n Accuracy: {acc:>0.1f}%, "
      f"Precision: {precision:.4f}, Recall: {recall:.4f} \n")

c_matrix = confusion_matrix(y_test_arr, y_pred)
ConfusionMatrixDisplay(c_matrix).plot()

## Example: Is Sally's transaction likely to be fraudulent?

Here is the order of the fields from Sally's transaction details:
* distance_from_last_transaction
* ratio_to_median_price
* used_chip 
* used_pin_number
* online_order 

In [None]:
sally_transaction_details = [
    [0.3111400080477545,
    1.9459399775518593, 
    1.0, 
    0.0, 
    0.0]
    ]
prediction = sess.run([output_name], {input_name: scaler.transform(sally_transaction_details).astype(np.float32)})

print("Is Sally's transaction predicted to be fraudulent? (true = YES, false = NO) ")
print(np.squeeze(prediction) > threshold)

print("How likely was Sally's transaction to be fraudulent? ")
print("{:.5f}".format(np.squeeze(prediction)) + "%")

In [None]:
print(xtrain.head())