In [1]:
import numpy as np
import pandas as pd


iris = pd.read_csv("Iris.csv") 

In [2]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [3]:
predictors_df = iris.loc[:,iris.columns!='Species']
target_df = iris['Species']

In [4]:
timestamps = pd.date_range(end = pd.Timestamp.now(),
                           periods = len(iris),freq = 'D').to_frame(name = 'event_timestamp', index = False)

In [5]:
predictors_df = pd.concat(objs = [predictors_df, timestamps], axis = 1)
target_df = pd.concat(objs = [target_df, timestamps], axis =1)
target_df['Id'] = predictors_df['Id']

In [6]:
predictors_df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,event_timestamp
0,1,5.1,3.5,1.4,0.2,2022-07-01 11:57:39.787629
1,2,4.9,3.0,1.4,0.2,2022-07-02 11:57:39.787629
2,3,4.7,3.2,1.3,0.2,2022-07-03 11:57:39.787629
3,4,4.6,3.1,1.5,0.2,2022-07-04 11:57:39.787629
4,5,5.0,3.6,1.4,0.2,2022-07-05 11:57:39.787629


In [7]:
target_df.head()

Unnamed: 0,Species,event_timestamp,Id
0,Iris-setosa,2022-07-01 11:57:39.787629,1
1,Iris-setosa,2022-07-02 11:57:39.787629,2
2,Iris-setosa,2022-07-03 11:57:39.787629,3
3,Iris-setosa,2022-07-04 11:57:39.787629,4
4,Iris-setosa,2022-07-05 11:57:39.787629,5


In [8]:
predictors_df.to_parquet(path='predictors_df.parquet')
target_df.to_parquet(path='target_df.parquet')

In [9]:
!feast version

Feast SDK Version: "feast 0.21.3"


In [25]:
!feast init feature_repo

  for dt in pd.date_range(

Creating a new Feast repository in [1m[32m/home/vboxuser/feast_iris/feature_repo[0m.



In [10]:
cd feature_repo

/home/vboxuser/feast_iris/feature_repo


In [57]:
!feast apply

Updated feature view [1m[33mpredictors_df_feature_view[0m
	ttl: [1m[33mseconds: 172800
[0m -> [1m[92m[0m
Updated feature view [1m[33mtarget_df_feature_view[0m
	ttl: [1m[33mseconds: 172800
[0m -> [1m[92m[0m

[1m[94mNo changes to infrastructure


In [58]:
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

store = FeatureStore(repo_path='.')

entity_df = pd.read_parquet(path = 'data/target_df.parquet')

training_data = store.get_historical_features(
entity_df = entity_df,
    features = [
            "predictors_df_feature_view:SepalLengthCm",
            "predictors_df_feature_view:SepalWidthCm",
            "predictors_df_feature_view:PetalLengthCm",
            "predictors_df_feature_view:PetalWidthCm" 
               ]
)

dataset = store.create_saved_dataset(
from_=training_data,
    name = "iris_dataset",
    storage = SavedDatasetFileStorage('data/iris_dataset.parquet')
)



In [59]:
training_data.to_df()

Unnamed: 0,Species,event_timestamp,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,Iris-setosa,2022-06-30 21:52:05.724617+00:00,1,5.1,3.5,1.4,0.2
1,Iris-setosa,2022-07-01 21:52:05.724617+00:00,2,4.9,3.0,1.4,0.2
2,Iris-setosa,2022-07-02 21:52:05.724617+00:00,3,4.7,3.2,1.3,0.2
3,Iris-setosa,2022-07-03 21:52:05.724617+00:00,4,4.6,3.1,1.5,0.2
4,Iris-setosa,2022-07-04 21:52:05.724617+00:00,5,5.0,3.6,1.4,0.2
...,...,...,...,...,...,...,...
145,Iris-virginica,2022-11-22 21:52:05.724617+00:00,146,6.7,3.0,5.2,2.3
146,Iris-virginica,2022-11-23 21:52:05.724617+00:00,147,6.3,2.5,5.0,1.9
147,Iris-virginica,2022-11-24 21:52:05.724617+00:00,148,6.5,3.0,5.2,2.0
148,Iris-virginica,2022-11-25 21:52:05.724617+00:00,149,6.2,3.4,5.4,2.3


In [60]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from joblib import dump

# Getting our FeatureStore
store = FeatureStore(repo_path=".")

training_df = store.get_saved_dataset(name="iris_dataset").to_df()

# Separating the features and labels
y = training_df['Species']
X = training_df.drop(
    labels=['Species', 'event_timestamp', 'Id'], 
    axis=1)

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    stratify=y)

# Creating and training LogisticRegression
reg = LogisticRegression()
reg.fit(X=X_train[sorted(X_train)], y=y_train)

# Saving the model
dump(value=reg, filename="model.joblib")



['model.joblib']

In [61]:
from datetime import datetime, timedelta

# Getting our FeatureStore
store = FeatureStore(repo_path=".")

store.materialize_incremental(end_date = datetime.now())

Materializing [1m[32m2[0m feature views to [1m[32m2022-11-27 12:13:29+01:00[0m into the [1m[32msqlite[0m online store.

Since the ttl is 0 for feature view [1m[32mpredictors_df_feature_view[0m, the start date will be set to 1 year before the current time.
[1m[32mpredictors_df_feature_view[0m from [1m[32m2021-11-28 11:13:29+01:00[0m to [1m[32m2022-11-27 12:13:29+01:00[0m:


100%|███████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 4455.39it/s]


Since the ttl is 0 for feature view [1m[32mtarget_df_feature_view[0m, the start date will be set to 1 year before the current time.
[1m[32mtarget_df_feature_view[0m from [1m[32m2021-11-28 11:13:29+01:00[0m to [1m[32m2022-11-27 13:13:29+01:00[0m:


100%|███████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 7018.66it/s]


In [69]:
from feast import FeatureStore
import pandas as pd
from joblib import load

# Getting our FeatureStore
store = FeatureStore(repo_path=".")

# Defining our features names
feast_features = [
            "predictors_df_feature_view:SepalLengthCm",
            "predictors_df_feature_view:SepalWidthCm",
            "predictors_df_feature_view:PetalLengthCm",
            "predictors_df_feature_view:PetalWidthCm",
    ]

# Getting the latest features
features = store.get_online_features(
    features=feast_features,    
    entity_rows=[{"Id": 150}]
).to_dict()

# Converting the features to a DataFrame
features_df = pd.DataFrame.from_dict(data=features)

In [70]:
features_df.head()

Unnamed: 0,Id,SepalWidthCm,SepalLengthCm,PetalWidthCm,PetalLengthCm
0,150,3.0,5.9,1.8,5.1


In [68]:
reg = load("model.joblib")
predictions = reg.predict(features_df[sorted(features_df.drop("Id", axis=1))])
print(predictions)

['Iris-setosa']
