# Adapting Iris Dataset for Feast

## Importing Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from datetime import datetime, timedelta

## Load Dataset

In [2]:
df = pd.read_csv('data/iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Add `event_timestamp` and `flower_id`

In [3]:
# Adding event_timestamp - helps Feast serve the correct historical feature values for model training
base_time = datetime.now()
df['event_timestamp'] = [base_time - timedelta(minutes=i) for i in range(len(df))]

# Adding flower_id - acts as an entity key which is essential for joining with feature views
ordinal_encoder = OrdinalEncoder()
df['flower_id'] = ordinal_encoder.fit_transform(df[['species']]).astype(int)

In [4]:
df[:5]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,event_timestamp,flower_id
0,5.1,3.5,1.4,0.2,setosa,2025-06-22 18:21:21.912413,0
1,4.9,3.0,1.4,0.2,setosa,2025-06-22 18:20:21.912413,0
2,4.7,3.2,1.3,0.2,setosa,2025-06-22 18:19:21.912413,0
3,4.6,3.1,1.5,0.2,setosa,2025-06-22 18:18:21.912413,0
4,5.0,3.6,1.4,0.2,setosa,2025-06-22 18:17:21.912413,0


In [5]:
df[50:55]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,event_timestamp,flower_id
50,7.0,3.2,4.7,1.4,versicolor,2025-06-22 17:31:21.912413,1
51,6.4,3.2,4.5,1.5,versicolor,2025-06-22 17:30:21.912413,1
52,6.9,3.1,4.9,1.5,versicolor,2025-06-22 17:29:21.912413,1
53,5.5,2.3,4.0,1.3,versicolor,2025-06-22 17:28:21.912413,1
54,6.5,2.8,4.6,1.5,versicolor,2025-06-22 17:27:21.912413,1


In [6]:
df[100:105]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,event_timestamp,flower_id
100,6.3,3.3,6.0,2.5,virginica,2025-06-22 16:41:21.912413,2
101,5.8,2.7,5.1,1.9,virginica,2025-06-22 16:40:21.912413,2
102,7.1,3.0,5.9,2.1,virginica,2025-06-22 16:39:21.912413,2
103,6.3,2.9,5.6,1.8,virginica,2025-06-22 16:38:21.912413,2
104,6.5,3.0,5.8,2.2,virginica,2025-06-22 16:37:21.912413,2


## Saving the dataset

In [7]:
df.to_csv('data/iris_feast.csv', index=False)
df.to_parquet('data/iris_feast.parquet', index=False)

# Utilizing Feast as a part of Training and Inference

In [8]:
!pip install feast --quiet

In [9]:
%%bash
cd feature_repo
rm -rf .ipynb_checkpoints
feast apply
feast materialize-incremental $(date +%Y-%m-%d)

  flower = Entity(name='flower_id', join_keys=['flower_id'])


No project found in the repository. Using project name iris_project defined in feature_store.yaml
Applying changes for project iris_project
Created project iris_project
Created entity flower_id
Created feature view iris_features





Created sqlite table iris_project_iris_features





Materializing [1m[32m0[0m feature views to [1m[32m2025-06-22 00:00:00+00:00[0m into the [1m[32msqlite[0m online store.



## Load features for Training and Inference

In [10]:
from feast import FeatureStore
import pandas as pd

store = FeatureStore(repo_path='feature_repo')

entity_df = pd.read_parquet('data/iris_feast.parquet')[['flower_id', 'event_timestamp']]
entity_df['event_timestamp'] = pd.to_datetime(entity_df['event_timestamp'])

training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        'iris_features:sepal_length',
        'iris_features:sepal_width',
        'iris_features:petal_length',
        'iris_features:petal_width',
    ],
).to_df()

training_df.head()



Unnamed: 0,flower_id,event_timestamp,sepal_length,sepal_width,petal_length,petal_width
0,2,2025-06-22 15:52:21.912413+00:00,5.9,3.0,5.1,1.8
1,2,2025-06-22 15:53:21.912413+00:00,6.2,3.4,5.4,2.3
2,2,2025-06-22 15:54:21.912413+00:00,6.5,3.0,5.2,2.0
3,2,2025-06-22 15:55:21.912413+00:00,6.3,2.5,5.0,1.9
4,2,2025-06-22 15:56:21.912413+00:00,6.7,3.0,5.2,2.3


In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

y = training_df[['flower_id']]
X = training_df.drop(columns=['flower_id', 'event_timestamp'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
model = DecisionTreeClassifier().fit(X_train, y_train)

In [12]:
from sklearn import metrics

prediction = model.predict(X_test)
print('The accuracy of the Decision Tree is',"{:.3f}".format(metrics.accuracy_score(prediction,y_test)))

The accuracy of the Decision Tree is 0.933
