In [1]:
import joblib

import pandas as pd
import numpy as np
import xgboost as xgb

from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier

from mlflow.tracking import MlflowClient

In [5]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow.set_tracking_uri("sqlite:///mlflow-experiments.db")

mlflow.set_experiment("wine_quality_clf")

In [11]:
def read_dataframe():
    redwine = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')
    redwine['type'] = 'red'

    whitewine = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', sep=';')
    whitewine['type'] = 'white'

    df = pd.concat([redwine, whitewine], axis = 0).reset_index(drop=True)
    
    numeric_columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']

    df['quality'] =  np.where(df.quality.isin([3,4]), '3-4', np.where(df.quality.isin([8,9]), '8-9', df.quality))
    df['quality'] =  df.quality.astype('object')
    
    return df, numeric_columns


def train_model(df, numeric_columns):
    X_train, X_test, y_train, y_test = train_test_split(df.drop('quality', axis=1), df.quality,
                                                    stratify=df.quality, 
                                                    test_size=0.3,
                                                    random_state=123)


    pipe = Pipeline([
        ('column_transformer', ColumnTransformer([
            ('one-hot', OneHotEncoder(handle_unknown='ignore'), ['type']),
            ('scaler', StandardScaler(), numeric_columns)], remainder='drop')),
        ('model', OneVsRestClassifier(xgb.XGBClassifier(random_state= 123, eval_metric='logloss')))])
            
    return X_train, X_test, y_train, y_test, pipe

In [12]:
df, numeric_columns = read_dataframe()

X_train, X_test, y_train, y_test, pipe = train_model(df, numeric_columns)

In [13]:
with mlflow.start_run():
    params = dict(random_state= 123, eval_metric='logloss')
    mlflow.log_params(params)

    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test) 

    accuracy = accuracy_score(y_pred, y_test)
    print('Model Accuracy :', accuracy)
    mlflow.log_metric('accuracy', accuracy)

    mlflow.sklearn.log_model(pipe, artifact_path="model")



Model Accuracy : 0.6594871794871795


In [30]:
client = MlflowClient(tracking_uri="sqlite:///mlflow-experiments.db")

experiment_list = client.list_experiments()


[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='wine_qualit_clf', tags={}>,
 <Experiment: artifact_location='./mlruns/2', experiment_id='2', lifecycle_stage='active', name='wine_quality_clf', tags={}>]

In [28]:
runs = client.search_runs(
    experiment_ids='2'
)

for run in runs:
    print(f"run id: {run.info.run_id}, accuracy: {run.data.metrics['accuracy_score']:.4f}")

run id: 86c3fad4fc9348ef8a0d923f182e7a70, accuracy: 0.0303
run id: d2876ca137ba45ea9cb951b129f718c5, accuracy: 0.5513
run id: 992ecf9e517547c6a4f03fa6a2d07dd5, accuracy: 0.6169
run id: a80061a7fe204ac9b7535443f2646c29, accuracy: 0.5821
run id: 300244f6f3294016ac0381e0b5d8fb69, accuracy: 0.5636
run id: 43f36031293941ed8d47c217efab55d1, accuracy: 0.0303
run id: 73e04249178348039578579278b0232a, accuracy: 0.5985
run id: 7f6e158c36884ad5bcfbd4e9d7244f0b, accuracy: 0.5708
run id: af851998f87d476e9306a6e5729b5bd2, accuracy: 0.5477
run id: 8c25338047fc48c191e24b1852c08308, accuracy: 0.0303


KeyError: 'accuracy_score'

In [16]:
run_id = '0c97a1e2b4a2472f825f4c29c5711b56'
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="wine_quality_clf")

INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Successfully registered model 'wine_quality_clf'.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2022/09/12 14:31:53 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: wine_quality_clf, version 1
Created version '1' of model 'wine_quality_clf'.


<ModelVersion: creation_timestamp=1662982312990, current_stage='None', description=None, last_updated_timestamp=1662982312990, name='wine_quality_clf', run_id='0c97a1e2b4a2472f825f4c29c5711b56', run_link=None, source='./mlruns/1/0c97a1e2b4a2472f825f4c29c5711b56/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [18]:
model_version = 1
model_name = "wine_quality_clf"
new_stage = "Staging"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1662982312990, current_stage='Staging', description=None, last_updated_timestamp=1662982395405, name='wine_quality_clf', run_id='0c97a1e2b4a2472f825f4c29c5711b56', run_link=None, source='./mlruns/1/0c97a1e2b4a2472f825f4c29c5711b56/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [19]:

latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Staging


In [20]:
model_version = 1
model_name = "wine_quality_clf"
new_stage = "Production"
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stage,
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1662982312990, current_stage='Production', description=None, last_updated_timestamp=1662982450040, name='wine_quality_clf', run_id='0c97a1e2b4a2472f825f4c29c5711b56', run_link=None, source='./mlruns/1/0c97a1e2b4a2472f825f4c29c5711b56/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [22]:
client.transition_model_version_stage(
    name=model_name,
    version=1,
    stage="Production",
    archive_existing_versions=True
)

<ModelVersion: creation_timestamp=1662982312990, current_stage='Production', description=None, last_updated_timestamp=1662982472604, name='wine_quality_clf', run_id='0c97a1e2b4a2472f825f4c29c5711b56', run_link=None, source='./mlruns/1/0c97a1e2b4a2472f825f4c29c5711b56/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [6]:
from mlflow.entities import ViewType

EXPERIMENT_NAME = 'wine_quality_clf'

def run():

    client = MlflowClient()

    # retrieve the top_n model runs and log the models to MLflow
    experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
    

    # select the model with the highest accuracy
    experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
    best_run = client.search_runs( 
    experiment_ids=experiment.experiment_id,
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.accuracy_score DESC"]
    )[0]

    # register the best model
    run_id = best_run.info.run_id
    model_uri = f"runs:/{run_id}/model"
    print("Accuracy score of the best model" , best_run.data.metrics['accuracy_score'])
    model_name = "wine_quality_clf" 
    #mlflow.register_model(model_uri=model_uri, name=model_name)
    
    return model_uri, model_name 


In [7]:
run()

Accuracy score of the best model 0.6169230769230769


('runs:/992ecf9e517547c6a4f03fa6a2d07dd5/model', 'wine_quality_clf')

In [None]:
def register_model(model_uri, model_name):
    client.transition_model_version_stage(
        name=model_name,
        version=1,
        stage="Staging",
        archive_existing_versions=True
    )