In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import joblib
from feast import FeatureStore

# Your project details (update these)
PROJECT_ID = "mlops-iris-22f3002972"
LOCATION = "us-central1"
BUCKET_URI = "gs://mlops-iris-output-22f3002972"
MODEL_ARTIFACT_DIR = "iris_classifier/model_feast"

In [21]:
print("Connecting to Feast Feature Store...")
store = FeatureStore(repo_path="/home/jupyter/feast_repo/iris_features")

print("✓ Connected to Feature Store")
print(f"Project: {store.project}")

Connecting to Feast Feature Store...
✓ Connected to Feature Store
Project: iris_features




In [22]:
print("Connecting to Feast Feature Store...")
store = FeatureStore(repo_path="/home/jupyter/feast_repo/iris_features")

print("✓ Connected to Feature Store")
print(f"Project: {store.project}")

Connecting to Feast Feature Store...
✓ Connected to Feature Store
Project: iris_features


In [23]:
# Load entity IDs with timestamps
iris_data = pd.read_parquet('/home/jupyter/data/iris_features.parquet')
entity_df = iris_data[['iris_id', 'event_timestamp']].copy()

print(f"Total samples: {len(entity_df)}")
entity_df.head()

Total samples: 150


Unnamed: 0,iris_id,event_timestamp
0,1,2024-01-01 00:00:00
1,2,2024-01-01 01:00:00
2,3,2024-01-01 02:00:00
3,4,2024-01-01 03:00:00
4,5,2024-01-01 04:00:00


In [24]:
print("Fetching features from Feast Offline Store...")

# Get historical features for training
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "iris_features:sepal_length",
        "iris_features:sepal_width",
        "iris_features:petal_length",
        "iris_features:petal_width",
        "iris_features:species",
    ],
).to_df()

print(f"✓ Retrieved {training_data.shape[0]} samples")
print(f"Columns: {training_data.columns.tolist()}")
training_data.head()

Fetching features from Feast Offline Store...
✓ Retrieved 150 samples
Columns: ['iris_id', 'event_timestamp', 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']


Unnamed: 0,iris_id,event_timestamp,sepal_length,sepal_width,petal_length,petal_width,species
0,1,2024-01-01 00:00:00+00:00,5.1,3.5,1.4,0.2,setosa
1,2,2024-01-01 01:00:00+00:00,4.9,3.0,1.4,0.2,setosa
2,3,2024-01-01 02:00:00+00:00,4.7,3.2,1.3,0.2,setosa
3,4,2024-01-01 03:00:00+00:00,4.6,3.1,1.5,0.2,setosa
4,5,2024-01-01 04:00:00+00:00,5.0,3.6,1.4,0.2,setosa


In [25]:
# Split features and target
X = training_data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = training_data['species']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42
)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

Training samples: 90
Test samples: 60


In [26]:
print("Training Decision Tree model...")
mod_dt = DecisionTreeClassifier(max_depth=3, random_state=1)
mod_dt.fit(X_train, y_train)

# Evaluate
prediction = mod_dt.predict(X_test)
accuracy = metrics.accuracy_score(y_test, prediction)

print(f'The accuracy of the Decision Tree is {accuracy:.3f}')

Training Decision Tree model...
The accuracy of the Decision Tree is 0.983


In [27]:
import os

# Create artifacts directory
os.makedirs('artifacts', exist_ok=True)

# Save model
joblib.dump(mod_dt, 'artifacts/model_feast.joblib')
print("✓ Model saved: artifacts/model_feast.joblib")

✓ Model saved: artifacts/model_feast.joblib


In [28]:
# Upload to Cloud Storage
!gsutil cp artifacts/model_feast.joblib {BUCKET_URI}/{MODEL_ARTIFACT_DIR}/

print(f"✓ Model uploaded to: {BUCKET_URI}/{MODEL_ARTIFACT_DIR}/")

Copying file://artifacts/model_feast.joblib [Content-Type=application/octet-stream]...
/ [1 files][  2.5 KiB/  2.5 KiB]                                                
Operation completed over 1 objects/2.5 KiB.                                      
✓ Model uploaded to: gs://mlops-iris-output-22f3002972/iris_classifier/model_feast/
