# Models Logging and Tracking

In this notebook, we will log and track the hyperparameters and scores of multiple model runs with the help of MLFlow.

# 0 Setup 

## 0.1 Imports

In [2]:
import numpy as np
import pandas as pd

# Scikit Learning libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

# MLFlow
import mlflow
from mlflow.models import infer_signature

## 0.2 Server (Locally Hosted)

In [3]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

## 0.3 Path Definition

In [4]:
HOME_PATH = '~/Documents/Data Science Projects/stars'
INTERIM_DATA_PATH = '/data/interim/'

## 0.4 Loading Data

The 'stars' notebook must be run before loading the data here.

In [5]:
stars_final_df = pd.read_csv(HOME_PATH + INTERIM_DATA_PATH + 'selected_features_df_final.csv', index_col = 0)

# 1 Models

In this section, we setup the models that we'll track later with MLFlow.

In [6]:
X = stars_final_df .drop('Star type', axis = 1)
y = stars_final_df ['Star type']

## 1.1 CART

In [7]:
# setting the training split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# setting up the model. we use 4 maximum depth, with no minimum leaf size
cart_params = {'max_depth': 4}
cart_model = tree.DecisionTreeClassifier(**cart_params)

# fitting the model to the training data
cart_classifier = cart_model.fit(X_train, y_train)

# getting model predictions over the test set
cart_pred_labels_te = cart_model.predict(X_test)

# model perfomance report
cart_accuracy = accuracy_score(y_test, cart_pred_labels_te)

# 2 Model Logging and Tracking

## 2.1 CART

In [11]:
# Create a new MLflow Experiment
mlflow.set_experiment("CART Experiment")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(cart_params)

    # Log the loss metric
    mlflow.log_metric("accuracy", cart_accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Stars - CART Model")

    # Infer the model signature
    signature = infer_signature(X_train, cart_model.predict(X_train))

    # Log the model
    cart_model_info = mlflow.sklearn.log_model(
        sk_model=cart_model,
        artifact_path="stars_cart_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-cart",
    )

Registered model 'tracking-cart' already exists. Creating a new version of this model...
2024/02/06 11:52:58 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-cart, version 2
Created version '2' of model 'tracking-cart'.


In [14]:
cart_loaded_model = mlflow.pyfunc.load_model(cart_model_info.model_uri)

cart_predictions = cart_loaded_model.predict(X_test)

cart_result = pd.DataFrame(X_test)
cart_result["actual_class"] = y_test
cart_result["predicted_class"] = cart_predictions

cart_result[:10]

Downloading artifacts: 100%|█████████████████████| 6/6 [00:00<00:00, 246.37it/s]


Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star color encoded,actual_class,predicted_class
109,33421,352000.0,67.0,-5.79,0.0,4,3
71,3607,0.022,0.38,10.12,5.0,1,1
37,6380,1.35,0.98,2.93,9.0,3,3
74,3550,0.004,0.291,10.89,5.0,1,1
108,24345,142000.0,57.0,-6.24,0.0,4,3
227,10930,783930.0,25.0,-6.224,0.0,4,3
156,26140,14520.0,5.49,-3.8,1.0,3,3
220,23678,244290.0,35.0,-6.27,0.0,4,3
152,14060,1092.0,5.745,-2.04,1.0,3,3
194,3523,0.0054,0.319,12.43,5.0,1,1
