## Scenario 1: A single data scientist participating in an ML competition

MLflow setup:
* Tracking server: no
* Backend store: local filesystem
* Artifacts store: local filesystem

The experiments can be explored locally by launching the MLflow UI.

In [1]:
import mlflow

In [2]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'file:///home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns'


In [3]:
from mlflow.tracking import MlflowClient


# instantiate the client
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [5]:
# we can create experiments
client.create_experiment(name="test2")

'2'

In [7]:
client.search_experiments()

[<Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/2', creation_time=1684593797187, experiment_id='2', last_update_time=1684593797187, lifecycle_stage='active', name='test2', tags={}>,
 <Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/1', creation_time=1684593752837, experiment_id='1', last_update_time=1684593752837, lifecycle_stage='active', name='test', tags={}>,
 <Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/0', creation_time=1684593696168, experiment_id='0', last_update_time=1684593696168, lifecycle_stage='active', name='Default', tags={}>]

In [13]:
client.create_experiment(name="my-cool-experiment")

MlflowException: Experiment(name=my-cool-experiment) already exists. Error: (raised as a result of Query-invoked autoflush; consider using a session.no_autoflush block if this flush is occurring prematurely)
(sqlite3.IntegrityError) UNIQUE constraint failed: experiments.name
[SQL: INSERT INTO experiments (name, artifact_location, lifecycle_stage, creation_time, last_update_time) VALUES (?, ?, ?, ?, ?)]
[parameters: ('my-cool-experiment', None, 'active', 1684594155150, 1684594155150)]
(Background on this error at: https://sqlalche.me/e/20/gkpj)

In [15]:
client.search_experiments()

[<Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/3', creation_time=1684594024737, experiment_id='3', last_update_time=1684594024737, lifecycle_stage='active', name='my-cool-experiment', tags={}>,
 <Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/2', creation_time=1684593797187, experiment_id='2', last_update_time=1684593797187, lifecycle_stage='active', name='test2', tags={}>,
 <Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/1', creation_time=1684593752837, experiment_id='1', last_update_time=1684593752837, lifecycle_stage='active', name='test', tags={}>,
 <Experiment: artifact_location='/home/ubuntu/mlops-zoomcamp/02-experiment-tracking/running-mlflow-examples/mlruns/0', creation_time=1684593696168, experiment_id='0', last_update_time=1684593696168, lifecycle_stage='active', name='Default', 

In [17]:
    from mlflow.entities import ViewType

    runs = client.search_runs(
        experiment_ids= "2",
        filter_string="",
        run_view_type=ViewType.ACTIVE_ONLY,
        max_results=5,
        order_by=["metrics.rmse ASC"]
    )

In [18]:
        # the runs have a ton of information, but we can choose to only view the run_id and rmse
        for run in runs:
            print(f"run id: {run.info.run_id}, rmse: {run.data.metrics['rmse']}")

In [8]:
# list experiments
# client.list_experiments()

In [None]:
# client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [9]:
# client.list_experiments()

In [10]:
# mlflow.list_experiments()

### Creating an experiment and logging a new run

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

In [None]:
mlflow.list_experiments()

### Interacting with the model registry

In [None]:
from mlflow.tracking import MlflowClient


client = MlflowClient()

In [None]:
from mlflow.exceptions import MlflowException

try:
    client.list_registered_models()
except MlflowException:
    print("It's not possible to access the model registry :(")