In [None]:
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from dotenv import load_dotenv
from src.paths import PARENT_DIR
from matplotlib import pyplot
import pandas as pd
import seaborn as sns
import hopsworks
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import shutil
import joblib

In [None]:
# HOPSWORKS_PROJECT_NAME = 'Vivekmaj1'

# # load key-value pairs from .env file located in the parent directory
# load_dotenv(PARENT_DIR / '.env')

# HOPSWORKS_API_KEY = os.environ['HOPSWORKS_API_KEY']

# project = hopsworks.login(
#     project=HOPSWORKS_PROJECT_NAME,
#     api_key_value=HOPSWORKS_API_KEY
# )

# fs = project.get_feature_store()

In [None]:
try: 
    feature_view = fs.get_feature_view(name="iris", version=1)
except:
    iris_fg = fs.get_feature_group(name="iris", version=1)
    query = iris_fg.select_all()
    feature_view = fs.create_feature_view(name="iris",
                                      version=1,
                                      description="Read from Iris flower dataset",
                                      labels=["variety"],
                                      query=query)

In [None]:
iris, _ = feature_view.training_data(
    description='Iris Data',
)

In [None]:
iris.head()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(iris, _, test_size=0.2)

In [None]:
class ExtractExtraFeatures(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        # Create new features
        X['sepal_area'] = X['sepal_length'] * X['sepal_width']
        X['petal_area'] = X['petal_length'] * X['petal_width']
        X['sepal_petal_length_diff'] = X['sepal_length'] - X['petal_length']
        X['sepal_petal_width_diff'] = X['sepal_width'] - X['petal_width']
        X['sepal_length_to_width'] = X['sepal_length'] / X['sepal_width']
        X['petal_length_to_width'] = X['petal_length'] / X['petal_width']
        
        return X

In [None]:
# Create a pipeline
pipeline = Pipeline([
    ('extract_features', ExtractExtraFeatures()),
    ('classifier', KNeighborsClassifier())
])

# Fit the pipeline
pipeline.fit(X_train, y_train)

In [None]:
y_pred = pipeline.predict(X_test)
y_pred

In [None]:
metrics = classification_report(y_test, y_pred, output_dict=True)
print(metrics)

In [None]:
results = confusion_matrix(y_test, y_pred)
print(results)

In [None]:
df_cm = pd.DataFrame(results, ['True Setosa', 'True Versicolor', 'True Virginica'],
                     ['Pred Setosa', 'Pred Versicolor', 'Pred Virginica'])

cm = sns.heatmap(df_cm, annot=True)

fig = cm.get_figure()
fig.savefig("../assets/confusion_matrix.png") 
fig.show()

In [None]:
project = hopsworks.login(
    project=HOPSWORKS_PROJECT_NAME,
    api_key_value=HOPSWORKS_API_KEY
)

# The 'iris_model' directory will be saved to the model registry
model_dir="iris_model"

if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

joblib.dump(pipeline, model_dir + "/iris_model.pkl")
shutil.copyfile("../assets/confusion_matrix.png", model_dir + "/confusion_matrix.png")

input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

model_registry = project.get_model_registry()

model = model_registry.python.create_model(
    name="iris_model",
    metrics={"accuracy" : metrics['accuracy'], "recall": metrics['weighted avg']['recall'],
             "precision": metrics['weighted avg']['precision'], "f1": metrics['weighted avg']['f1-score']},
    description="None",
    input_example=X_train.sample(),
    model_schema=model_schema
)

model.save(model_dir)