## Scenario 2: A cross-functional team with one data scientist working on an ML model


MLflow setup:
- tracking server: yes, local server
- backend store: sqlite database
- artifacts store: local filesystem

The experiments can be explored locally by accessing the local tracking server.

To run this example you need to launch the mlflow server locally by running the following command in your terminal:

`mlflow server --backend-store-uri sqlite:///backend.db`

#### Import the libraries

In [1]:
# Import the libraries
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

import mlflow
from mlflow.tracking import MlflowClient
from mlflow.exceptions import MlflowException
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository

#### Set up tracking URI

In [2]:
# Display default tracking uri
MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://127.0.0.1:5000'


#### List down all experiments

In [3]:
# Lost down all experiments before do an experiment
experiments = mlflow.search_experiments()
for experiment in experiments:
    print(f"experiment id : {experiment.experiment_id}, experiment name : {experiment.name}")

experiment id : 0, experiment name : Default


#### Creating an experiment and logging new run

In [4]:
# Create a new experiment
mlflow.set_experiment("exp_scenario_2")

# Track our training model process
with mlflow.start_run():

    # Load iris dataset
    X, y = load_iris(return_X_y=True)

    # Log the params
    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    # Training model
    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)

    # Log the metrics
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    # Log the model
    mlflow.sklearn.log_model(lr, artifact_path="models")
    
    # Display the artifact uri
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

2024/10/24 08:38:43 INFO mlflow.tracking.fluent: Experiment with name 'exp_scenario_2' does not exist. Creating a new experiment.


default artifacts URI: 'mlflow-artifacts:/1/df13eed1c7764a63ab67ec55ecb2fb36/artifacts'


In [5]:
# List down all experiments after do an experiment
experiments = mlflow.search_experiments()
for experiment in experiments:
    print(f"experiment id : {experiment.experiment_id}, experiment name : {experiment.name}")

experiment id : 1, experiment name : exp_scenario_2
experiment id : 0, experiment name : Default


### Interacting with the model registry

Register new model

In [6]:
# Instantiate client for manage experiments and runs
MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"
client = MlflowClient(MLFLOW_TRACKING_URI)

# Register model name in the model registry
registered_model_name = 'scenario_2'
client.create_registered_model(name=registered_model_name)

<RegisteredModel: aliases={}, creation_timestamp=1729733936360, description='', last_updated_timestamp=1729733936360, latest_versions=[], name='scenario_2', tags={}>

Create new version of registered model

In [7]:
# Get experiment id by experiment name
experiment_name = 'exp_scenario_2'
experiment = mlflow.get_experiment_by_name(name=experiment_name)
experiment_id = experiment.experiment_id

# Get run id
run_id = mlflow.search_runs(experiment_ids=experiment_id)['run_id'].values[0]

# Run URI
runs_uri =f"runs:/{run_id}/models"

# Source artifacts
model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)

# Description of the model version
desc = 'new version of scenario_2 model'

# Create a new model version
client.create_model_version(name=registered_model_name, source=model_src, run_id=run_id, description=desc)

2024/10/24 08:39:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: scenario_2, version 1


<ModelVersion: aliases=[], creation_timestamp=1729733944448, current_stage='None', description='new version of scenario_2 model', last_updated_timestamp=1729733944448, name='scenario_2', run_id='df13eed1c7764a63ab67ec55ecb2fb36', run_link='', source='mlflow-artifacts:/1/df13eed1c7764a63ab67ec55ecb2fb36/artifacts/models', status='READY', status_message='', tags={}, user_id='', version='1'>