In [1]:
import hopsworks
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
from hsml.model_schema import ModelSchema
from hsml.schema import Schema
import joblib

In [2]:
project = hopsworks.login()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/196773


In [3]:
fs = project.get_feature_store()
heart_fg = fs.get_feature_group(name="heart", version=1)

Connected. Call `.close()` to terminate connection gracefully.


# Get Data from Feature Store


In [4]:
query = heart_fg.select_all()
feature_view = fs.get_or_create_feature_view(name="heart",
                                  version=1,
                                  description="Read from Heart dataset",
                                  labels=["heartdisease"],
                                  query=query)

# Train Test Split and Scale

In [5]:
X_train, X_test, y_train, y_test = feature_view.train_test_split(0.2)
X_train = X_train.drop(columns=['timestamp'])
X_test = X_test.drop(columns=['timestamp'])

Finished: Reading data from Hopsworks, using ArrowFlight (11.20s) 




In [6]:
y_test = y_test.to_numpy()[:, 0]
y_train = y_train.to_numpy()[:, 0]

In [7]:
heart_df = pd.read_csv("./dataset/heart_res.csv")

from sklearn.model_selection import train_test_split

target = heart_df['HeartDisease']
X_train, X_test, y_train, y_test = train_test_split(heart_df.drop('HeartDisease', axis=1), target, test_size=0.2, random_state=42)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
model = RandomForestClassifier()

param_grid = {'n_estimators': [100, 200, 300],
            'max_features': ['sqrt', 'log2'],
            'max_depth' : [4,5,6,7,8],
            'criterion' :['gini', 'entropy']
            }

rand_search = RandomizedSearchCV(model, param_grid, cv=3, scoring='accuracy', n_iter=10, random_state=5, verbose=1)
rand_search.fit(X_train, y_train.values.ravel())

print(f"Best params: {rand_search.bestparams}")
print(f"Best score: {rand_search.bestscore}")
model = rand_search.bestestimator

In [10]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

         0.0       0.92      0.99      0.95     58485
         1.0       0.99      0.92      0.95     58484

    accuracy                           0.95    116969
   macro avg       0.96      0.95      0.95    116969
weighted avg       0.96      0.95      0.95    116969



# Store in Modal

In [12]:
# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.
mr = project.get_model_registry()

# The contents of the 'iris_model' directory will be saved to the model registry. Create the dir, first.
model_dir="heart_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry
joblib.dump(model, model_dir + "/heart_model.pkl")
joblib.dump(scaler, model_dir + "/heart_scaler.pkl")

# Specify the schema of the model's input/output using the features (X_train) and labels (y_train)
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema, output_schema)

# Create an entry in the model registry that includes the model's name, desc, metrics
heart_model = mr.python.create_model(
    name="heart_model", 
    # model_schema=model_schema,
    description="heart Predictor"
)

# Upload the model to the model registry, including all files in 'model_dir'
heart_model.save(model_dir)

Connected. Call `.close()` to terminate connection gracefully.


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/431119177 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/1623 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/196773/models/heart_model/2


Model(name: 'heart_model', version: 2)