# Model registration and versioning with MLFlow

In [1]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, roc_auc_score, precision_score, recall_score,roc_auc_score, average_precision_score
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [2]:
def eval_proba(actual, proba):
    roc_auc = roc_auc_score(actual, proba)
    average_precision= average_precision_score(actual, proba)
    
    return roc_auc,average_precision

In [3]:
def eval_pred(actual, pred):
   
    accuracy= accuracy_score(actual, pred)
    precision= precision_score(actual, pred)
    recall= recall_score(actual, pred)
    f1= f1_score(actual, pred)
        
    return accuracy, precision, recall, f1

In [4]:
if __name__ == "__main__":
    
    warnings.filterwarnings("ignore")
    np.random.seed(42)

    # Read the wine-quality csv file from the URL
    csv_url = (
        "C:/Users/willi/Python/Spotify_Project/Data/preprocess_data.csv"
    )
    try:
        data = pd.read_csv(csv_url, sep=",")
        
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s", e
        )

    # Split the data into training and test sets. (0.8, 0.2) split.
    train, test = train_test_split(data, test_size=0.2, random_state=42)

    # The predicted column is "mood" which is a binary [0, 1]
    train_x = train.drop("mood", axis=1)
    test_x = test.drop("mood", axis=1)
    train_y = train[["mood"]]
    test_y = test[["mood"]]

   
    with mlflow.start_run():
        
        clf = lgb.LGBMClassifier()
        clf.fit(train_x, train_y)

        y_pred = clf.predict(test_x)
        y_proba = clf.predict_proba(test_x)[:, 1]
        
        (roc_auc, average_precision) = eval_proba(test_y, y_proba)
        (accuracy, precision, recall, f1) = eval_pred(test_y, y_pred)

        #print( Lgbm model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("roc_auc: %s" % roc_auc)
        print("average_precision: %s" % average_precision)
        print("accuracy: %s" % accuracy)
        print("precision: %s" % precision)
        print("recall: %s" % recall)
        print("f1 score: %s" % f1)

        #mlflow.log_param("alpha", alpha)
        #mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("roc_auc", roc_auc)
        mlflow.log_metric("average_precision", average_precision)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1 score", f1)

        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        # Model registry does not work with file store
        if tracking_url_type_store != "file":

            # Register the model
            # There are other ways to use the Model Registry, which depends on the use case,
            # please refer to the doc for more information:
            # https://mlflow.org/docs/latest/model-registry.html#api-workflow
            mlflow.sklearn.log_model(clf, "model", registered_model_name="LgbmModel")
        else:
            mlflow.sklearn.log_model(clf, "model")

roc_auc: 0.8190164137120954
average_precision: 0.7797528345433136
accuracy: 0.7638888888888888
precision: 0.7602040816326531
recall: 0.7967914438502673
f1 score: 0.7780678851174935
