<a href="https://colab.research.google.com/github/besherh/Machine-Learning-Course/blob/master/MLOPS/Hello_MLflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MLflow + Colab – Example project
This project shows how you can easily log experiments with Google Colab, directly to an MLflow remote.

It uses DAGsHub MLflow remote server, which is a free hosted MLflow remote.

Instructions:
1. [Sign up to DAGsHub](https://dagshub.com/user/sign_up)
1. [Create an access token](https://dagshub.com/user/settings/tokens)
1. [Create a new project](https://dagshub.com/repo/create)
1. [Open the notebook in Google Colab](https://dagshub.com/Dean/mlflow-colab-example/src/master/mlflow-colab.ipynb)
1. Run the cell
1. Fill in your DAGsHub username, access token and project name.
1. Run the visualization cell if you want to see your experiments inside Colab.

In [4]:
!pip install mlflow --quiet

import mlflow
import os
from getpass import getpass

os.environ['MLFLOW_TRACKING_USERNAME'] = input()
os.environ['MLFLOW_TRACKING_PASSWORD'] = getpass('')
os.environ['MLFLOW_TRACKING_PROJECTNAME'] = input('')

mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME'] 
                        + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

with mlflow.start_run(run_name="MLflow on Colab"):
  mlflow.log_metric("m1", 2.0)
  mlflow.log_param("p1", "mlflow-colab")

besher.alhalabi
··········
mlops_demo


In [8]:
# See your experiments table inside Colab!
import IPython
display(IPython.display.IFrame("https://dagshub.com/"+ os.environ['MLFLOW_TRACKING_USERNAME'] 
                        + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + "/experiments/#/",'100%',600))

In [None]:
!pip list | grep mlflow

mlflow                        1.22.0


In [7]:
from pprint import pprint
import numpy as np
from sklearn.linear_model import LinearRegression
import mlflow
from mlflow import MlflowClient

def fetch_logged_data(run_id):
    client = MlflowClient()
    data = client.get_run(run_id).data
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in client.list_artifacts(run_id, "model")]
    return data.params, data.metrics, tags, artifacts

# enable autologging
mlflow.sklearn.autolog()

# prepare training data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3

# train a model
model = LinearRegression()
with mlflow.start_run() as run:
    model.fit(X, y)

# fetch logged data
params, metrics, tags, artifacts = fetch_logged_data(run.info.run_id)

pprint(params)

pprint(metrics)

pprint(tags)

pprint(artifacts)


{'copy_X': 'True',
 'fit_intercept': 'True',
 'n_jobs': 'None',
 'normalize': 'deprecated',
 'positive': 'False'}
{'training_mae': 2.22044604925031e-16,
 'training_mse': 1.97215226305253e-31,
 'training_r2_score': 1.0,
 'training_rmse': 4.44089209850063e-16,
 'training_score': 1.0}
{'estimator_class': 'sklearn.linear_model._base.LinearRegression',
 'estimator_name': 'LinearRegression'}
['model/MLmodel',
 'model/conda.yaml',
 'model/model.pkl',
 'model/python_env.yaml',
 'model/requirements.txt']
