<a href="https://colab.research.google.com/github/chakra-ai/mlflow-tracking/blob/main/MLflow_on_Google_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount the Google Drive if needed using the below code. 

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Install MLflow

In [2]:
!pip install mlflow --quiet
!pip install pyngrok --quiet

[K     |████████████████████████████████| 15.6 MB 22.2 MB/s 
[K     |████████████████████████████████| 79 kB 7.6 MB/s 
[K     |████████████████████████████████| 596 kB 60.6 MB/s 
[K     |████████████████████████████████| 146 kB 67.6 MB/s 
[K     |████████████████████████████████| 180 kB 57.6 MB/s 
[K     |████████████████████████████████| 58 kB 5.5 MB/s 
[K     |████████████████████████████████| 210 kB 52.9 MB/s 
[K     |████████████████████████████████| 53 kB 2.0 MB/s 
[K     |████████████████████████████████| 63 kB 1.5 MB/s 
[K     |████████████████████████████████| 75 kB 3.7 MB/s 
[?25h  Building wheel for databricks-cli (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 745 kB 24.3 MB/s 
[?25h  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone


# Check if MLflow is installed properly

In [3]:
!mlflow

Usage: mlflow [OPTIONS] COMMAND [ARGS]...

Options:
  --version  Show the version and exit.
  --help     Show this message and exit.

Commands:
  artifacts    Upload, list, and download...
  azureml      Serve models on Azure ML.
  db           Commands for managing an MLflow...
  deployments  Deploy MLflow models to custom...
  experiments  Manage experiments.
  gc           Permanently delete runs in the
               `deleted` lifecycle stage.

  models       Deploy MLflow models locally.
  run          Run an MLflow project from the...
  runs         Manage runs.
  sagemaker    Serve models on SageMaker.
  server       Run the MLflow tracking server.
  ui           Launch the MLflow tracking UI
               for...


# To visualize the MLflow in UI

In [4]:
from pyngrok import ngrok

In [9]:
#Terminate the open tunnels if exists
ngrok.kill()

In [14]:
# Setup your auth token and
# Get your authtoken from https://dashboard.ngrok.com/auth 
NGROK_AUTH_TOKEN = ""  # Fill your auth token.
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://94f8-34-86-112-59.ngrok.io


In [15]:
# Enable the mlflow UI
!mlflow ui

[2022-02-09 05:34:35 +0000] [627] [INFO] Starting gunicorn 20.1.0
[2022-02-09 05:34:35 +0000] [627] [INFO] Listening at: http://127.0.0.1:5000 (627)
[2022-02-09 05:34:35 +0000] [627] [INFO] Using worker: sync
[2022-02-09 05:34:35 +0000] [630] [INFO] Booting worker with pid: 630

Aborted!
[2022-02-09 05:35:03 +0000] [627] [INFO] Handling signal: int
[2022-02-09 05:35:03 +0000] [630] [INFO] Worker exiting (pid: 630)
[2022-02-09 05:35:03 +0000] [627] [INFO] Shutting down: Master


# Lets do this practically with a simple machine learning model.

In [16]:
# Import the required libraries
import mlflow
import mlflow.sklearn

# Create a MLflow experiment
mlflow.set_experiment('LearnML-Demo')

2022/02/09 05:39:35 INFO mlflow.tracking.fluent: Experiment with name 'LearnML-Demo' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///content/mlruns/1', experiment_id='1', lifecycle_stage='active', name='LearnML-Demo', tags={}>

In [17]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import mlflow
import mlflow.sklearn
import sys
import os

In [18]:
def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2

In [23]:
filepath =  "drive/MyDrive/sample_data/winequality-red.csv"

In [26]:
data = pd.read_csv(filepath,delimiter=",")

In [27]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [28]:
# Read the wine-quality csv file


def train_model(alpha,l1_ratio):
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")

In [29]:
train_model(0.4,0.1)
train_model(0.1,0.1)
train_model(0.5,0.5)
train_model(0.2,0.2)
train_model(0.1,0.4)

In [30]:
# Setup your auth token and
# Get your authtoken from https://dashboard.ngrok.com/auth 
NGROK_AUTH_TOKEN = "" # Fill your auth token.
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://a90e-34-86-112-59.ngrok.io


In [31]:
!mlflow ui

[2022-02-09 05:46:42 +0000] [848] [INFO] Starting gunicorn 20.1.0
[2022-02-09 05:46:42 +0000] [848] [INFO] Listening at: http://127.0.0.1:5000 (848)
[2022-02-09 05:46:42 +0000] [848] [INFO] Using worker: sync
[2022-02-09 05:46:42 +0000] [853] [INFO] Booting worker with pid: 853
[2022-02-09 05:47:12 +0000] [848] [INFO] Handling signal: int
[2022-02-09 05:47:12 +0000] [853] [INFO] Worker exiting (pid: 853)

Aborted!
[2022-02-09 05:47:12 +0000] [848] [INFO] Shutting down: Master
