## Scenario 1: A single data scientist participating in an ML competition

MLflow setup:
* Tracking server: no
* Backend store: local filesystem
* Artifacts store: local filesystem

The experiments can be explored locally by launching the MLflow UI.

#### Import the libraries

In [46]:
# Import the libraries
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

import mlflow
from mlflow.tracking import MlflowClient
from mlflow.exceptions import MlflowException
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository

#### Set up tracking URI

In [8]:
# Display default tracking uri
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'file:///Users/farelyue/Documents/Projects/Data%20Science/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns'


#### List down all experiments

In [2]:
# Lost down all experiments before do an experiment
experiments = mlflow.search_experiments()
for experiment in experiments:
    print(f"experiment id : {experiment.experiment_id}, experiment name : {experiment.name}")

experiment id : 0, experiment name : Default


### Creating an experiment and logging a new run

In [12]:
# Create a new experiment
mlflow.set_experiment("exp_scenario_1")

# Track our training model process
with mlflow.start_run():

    # Load iris dataset
    X, y = load_iris(return_X_y=True)

    # Log the params
    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    # Training model
    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)

    # Log the metrics
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    # Log the model
    mlflow.sklearn.log_model(lr, artifact_path="models")
    
    # Display the artifact uri
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

2024/10/23 23:18:41 INFO mlflow.tracking.fluent: Experiment with name 'exp_scenario_1' does not exist. Creating a new experiment.


default artifacts URI: 'file:///Users/farelyue/Documents/Projects/Data%20Science/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/308051317503925502/5dfce563ab44448c94890830e616bc91/artifacts'




In [15]:
# Lost down all experiments after do an experiment
experiments = mlflow.search_experiments()
for experiment in experiments:
    print(f"experiment id : {experiment.experiment_id}, experiment name : {experiment.name}")

experiment id : 308051317503925502, experiment name : exp_scenario_1
experiment id : 0, experiment name : Default


### Interacting with the model registry

In [47]:
# Instantiate client for manage experiments and runs
client = MlflowClient()

registered_model_name = 'scenario_1'

# Register model name in the model registry
client.create_registered_model(name=registered_model_name)

# Create new version of model name
desc = 'new version of scenario_1 model'

# Get experiment id by experiment name
experiment_name = 'exp_scenario_1'
experiment = mlflow.get_experiment_by_name(name=experiment_name)
experiment_id = experiment.experiment_id

# Get run id
run_id = mlflow.search_runs(experiment_ids=experiment_id)['run_id'].values[0]

# Run URI
runs_uri =f"runs:/{run_id}/models"

# Source artifacts
model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)

# Create a new model version
client.create_model_version(name=registered_model_name, source=model_src, run_id=run_id, description=desc)

<ModelVersion: aliases=[], creation_timestamp=1729702082039, current_stage='None', description='new version of scenario_1 model', last_updated_timestamp=1729702082039, name='scenario_1', run_id='5dfce563ab44448c94890830e616bc91', run_link=None, source='file:///Users/farelyue/Documents/Projects/Data%20Science/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/308051317503925502/5dfce563ab44448c94890830e616bc91/artifacts/models', status='READY', status_message=None, tags={}, user_id=None, version=1>