## Install Required Packages
Before you start, make sure you have MLflow and Seaborn installed. You can install them using pip if they are not already installed:

In [1]:
# Import Libraries
import mlflow
import mlflow.sklearn
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:
#Load and Prepare Data
#Load the dataset and prepare it for training:
# Load the iris dataset
data = sns.load_dataset('iris')

# Features and target
X = data.drop('species', axis=1)
y = data['species']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Check if there's an active MLflow run and end it if there is
# This ensures that we do not have multiple runs running simultaneously
if mlflow.active_run() is not None:
    mlflow.end_run()

# Start a new MLflow run
# The 'with' statement ensures that the run is properly managed and closed
with mlflow.start_run() as run:
    
    # Create a RandomForestClassifier model with 100 trees and a fixed random state for reproducibility
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    # Train (fit) the model using the training data (X_train and y_train)
    model.fit(X_train, y_train)
    
    # Use the trained model to make predictions on the test data (X_test)
    y_pred = model.predict(X_test)
    
    # Calculate the accuracy of the model by comparing predictions to actual values (y_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Generate a detailed classification report that includes precision, recall, and F1-score
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Log the accuracy metric to MLflow
    # This allows us to track the performance of the model
    mlflow.log_metric("accuracy", accuracy)
    
    # Log the classification report as a JSON file to MLflow
    # This helps in keeping detailed evaluation metrics for later review
    mlflow.log_dict(report, "classification_report.json")
    
    # Log the trained model to MLflow
    # This saves the model so that it can be loaded and used later
    mlflow.sklearn.log_model(model, "model")
    
    # Print the accuracy of the model to the console
    print(f"Accuracy: {accuracy}")
    
    # Print the classification report to the console
    # This provides a detailed view of how well the model performed
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    # Print the unique identifier for the MLflow run
    # This ID helps to track and reference this particular run in MLflow
    print(f"Run ID: {run.info.run_id}")




Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Run ID: e847a9b8297847b7bc4bf0691849bb33


## Check MLflow UI
### After running the above code, you can check the MLflow UI to see the logged metrics and model. To start the MLflow UI, run the following command in your terminal:

In [4]:
!mlflow ui


[2024-08-14 14:14:05 +0530] [30137] [INFO] Starting gunicorn 22.0.0
[2024-08-14 14:14:05 +0530] [30137] [INFO] Listening at: http://127.0.0.1:5000 (30137)
[2024-08-14 14:14:05 +0530] [30137] [INFO] Using worker: sync
[2024-08-14 14:14:05 +0530] [30138] [INFO] Booting worker with pid: 30138
[2024-08-14 14:14:05 +0530] [30139] [INFO] Booting worker with pid: 30139
[2024-08-14 14:14:06 +0530] [30140] [INFO] Booting worker with pid: 30140
[2024-08-14 14:14:06 +0530] [30142] [INFO] Booting worker with pid: 30142
^C
[2024-08-14 14:14:37 +0530] [30137] [INFO] Handling signal: int
[2024-08-14 14:14:38 +0530] [30139] [INFO] Worker exiting (pid: 30139)
[2024-08-14 14:14:38 +0530] [30138] [INFO] Worker exiting (pid: 30138)
[2024-08-14 14:14:38 +0530] [30140] [INFO] Worker exiting (pid: 30140)
[2024-08-14 14:14:38 +0530] [30142] [INFO] Worker exiting (pid: 30142)


# Accessing Run ID from MLflow UI
## Start MLflow UI:
### Open your terminal and run:






In [None]:
!mlflow ui

This will start the MLflow tracking server, typically accessible at http://127.0.0.1:5000.

Navigate to MLflow UI:


Open your web browser and go to http://127.0.0.1:5000.

#### Find Your Experiment:

Click on the experiment name to see the list of runs.

You will see a table of runs with columns like "Run Name", "Start Time", "Duration", and "Metrics".


#### Get the Run ID:

Hover over or click on a specific run to see details.



The Run ID is usually visible in the URL or can be found in the run details page.

## Listing All Run IDs Programmatically
### You can also list all runs and their IDs programmatically using the mlflow.search_runs function. Here’s how:



In [29]:
import mlflow

# List all runs for the default experiment
runs = mlflow.search_runs()

# Print the run IDs and some default metrics
for index, row in runs.iterrows():
    run_id = row['run_id']
    print(f"Run ID: {run_id}")


Run ID: 0bb17d52b9f247febd6833aa92c316d0
Run ID: c6190d63cdc24266b2019e0f52a7d0eb
Run ID: f1e6dafb5f064929853964f56d3b5bfe
Run ID: 5dcce405a2e64c43a60a0931d802f76a
Run ID: 2c1d6c6325f9469fb1aa421c987b9b29



This code snippet lists all runs along with their IDs and any logged metrics. Adjust the column names based on the metrics you’ve logged.

# Accessing Run ID from Command Line
## If you know the experiment name or ID and want to retrieve run information from the command line, you can use the mlflow command-line interface:



In [32]:
import mlflow

# List of Run IDs
run_ids = [
    '0bb17d52b9f247febd6833aa92c316d0',
    'c6190d63cdc24266b2019e0f52a7d0eb',
    'f1e6dafb5f064929853964f56d3b5bfe',
    '5dcce405a2e64c43a60a0931d802f76a',
    '2c1d6c6325f9469fb1aa421c987b9b29'
]

# Retrieve and print details for each Run ID
for run_id in run_ids:
    run = mlflow.get_run(run_id)
    print(f"Run ID: {run_id}")
    print("Parameters:")
    for key, value in run.data.params.items():
        print(f"  {key}: {value}")
    print("Metrics:")
    for key, value in run.data.metrics.items():
        print(f"  {key}: {value}")
    print("Tags:")
    for key, value in run.data.tags.items():
        print(f"  {key}: {value}")
    print("-" * 40)


Run ID: 0bb17d52b9f247febd6833aa92c316d0
Parameters:
Metrics:
Tags:
  mlflow.runName: luminous-toad-978
  mlflow.source.name: /home/tar-tt060-saurav/.local/lib/python3.10/site-packages/ipykernel_launcher.py
  mlflow.source.type: LOCAL
  mlflow.user: tar-tt060-saurav
----------------------------------------
Run ID: c6190d63cdc24266b2019e0f52a7d0eb
Parameters:
Metrics:
  accuracy: 1.0
Tags:
  mlflow.runName: thoughtful-eel-295
  mlflow.source.name: /home/tar-tt060-saurav/.local/lib/python3.10/site-packages/ipykernel_launcher.py
  mlflow.source.type: LOCAL
  mlflow.user: tar-tt060-saurav
  mlflow.log-model.history: [{"run_id": "c6190d63cdc24266b2019e0f52a7d0eb", "artifact_path": "model", "utc_time_created": "2024-08-12 10:57:01.213941", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.12", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", 