In [2]:
import pandas as pd
import joblib
from datetime import datetime
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import gcsfs
import mlflow
from mlflow import MlflowClient
from mlflow.models import infer_signature
from pprint import pprint

In [None]:
data = pd.read_csv('data/iris.csv')
train, test = train_test_split(data, test_size = 0.4, stratify = data['species'], random_state = 42)
X_train = train[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_train = train.species
X_test = test[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_test = test.species

In [None]:
mlflow.set_tracking_uri("http://34.57.144.215:8100")
client = MlflowClient(mlflow.get_tracking_uri())
all_experiments = client.search_experiments()
print(all_experiments)

In [None]:
mlflow.set_experiment("IRIS Classifier: MLFlow")

In [None]:
params = {
    "max_depth": 2,
    "random_state": 1
}

In [None]:
mod_dt = DecisionTreeClassifier(**params)
mod_dt.fit(X_train, y_train)
prediction = mod_dt.predict(X_test)
accuracy_score = metrics.accuracy_score(prediction, y_test)
print(accuracy_score)

In [None]:
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy_score)
    mlflow.set_tag("Training info", "Decision Tree First Run")
    signature = infer_signature(X_train, mod_dt.predict(X_train))
    
    model_info = mlflow.sklearn.log_model(
        sk_model = mod_dt,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "IRIS-classifier-dt"
    )