## Docker example:
https://github.com/mlflow/mlflow/tree/master/examples/docker

# Demo MlFlow Project

In [53]:
import mlflow
from mlflow.exceptions import MlflowException
from  mlflow.tracking import MlflowClient

experimentPath = "experiment-L3"

try:
  experimentID = mlflow.create_experiment(experimentPath)
except MlflowException:
  experimentID = MlflowClient().get_experiment_by_name(experimentPath).experiment_id
  mlflow.set_experiment(experimentPath)

print("The experiment can be found at the path `{}` and has an experiment_id of `{}`".format(experimentPath, experimentID))

NoneType: None
Traceback (most recent call last):
  File "<ipython-input-53-fd2a321d0269>", line 8, in <module>
    experimentID = mlflow.create_experiment(experimentPath)
  File "/Users/azeltov/miniconda3/envs/azure_automl/lib/python3.6/site-packages/mlflow/tracking/fluent.py", line 239, in create_experiment
    """
  File "/Users/azeltov/miniconda3/envs/azure_automl/lib/python3.6/site-packages/mlflow/tracking/client.py", line 101, in create_experiment
    source_version=source_version
  File "/Users/azeltov/miniconda3/envs/azure_automl/lib/python3.6/site-packages/mlflow/store/file_store.py", line 170, in create_experiment
    str(exp_id), str(rnfe), exc_info=True)
mlflow.exceptions.MlflowException: Experiment 'experiment-L3' already exists.
Traceback (most recent call last):
  File "<ipython-input-53-fd2a321d0269>", line 8, in <module>
    experimentID = mlflow.create_experiment(experimentPath)
  File "/Users/azeltov/miniconda3/envs/azure_automl/lib/python3.6/site-packages/mlflow/tra

The experiment can be found at the path `experiment-L3` and has an experiment_id of `2`


%%sh
mkdir experiment-L3

In [31]:
import click
import numpy as np
import mlflow.sklearn
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

@click.command()
@click.option("--data_path", default="airbnb-cleaned-mlflow.csv", type=str)
@click.option("--n_estimators", default=10, type=int)
@click.option("--max_depth", default=20, type=int)
@click.option("--max_features", default="auto", type=str)
@click.option("--experiment_id", default=0, type=int)
@click.option("--run_name", default="experiment-L3", type=str)
def mlflow_rf(data_path, n_estimators, max_depth, max_features,experiment_id,run_name):

  with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
    # Import the data
    df = pd.read_csv(data_path)
    X_train, X_test, y_train, y_test = train_test_split(df.drop(["price"], axis=1), df[["price"]].values.ravel(), random_state=42)
    
    # Create model, train it, and create predictions
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)
    predictions = rf.predict(X_test)

    # Log model
    mlflow.sklearn.log_model(rf, "random-forest-model")
    
    # Log params
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("max_features", max_features)

    # Log metrics
    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)
    mlflow.log_metric("mse",mse )
    mlflow.log_metric("rmse",rmse )    
    mlflow.log_metric("mae", mean_absolute_error(y_test, predictions))  
    mlflow.log_metric("r2", r2_score(y_test, predictions))  
    

# if __name__ == "__main__":
#   mlflow_rf() # Note that this does not need arguments thanks to click

In [29]:
#mlflow_rf("airbnb-cleaned-mlflow.csv",10 ,20,"auto",2,"experiment-L3")

In [32]:
from click.testing import CliRunner

runner = CliRunner()
result = runner.invoke(mlflow_rf, ['--n_estimators', 10, '--max_depth', 20, '--experiment_id', experimentID], catch_exceptions=True)

assert result.exit_code == 0, "Code failed" # Check to see that it worked

print("Success!")

Success!


In [9]:
%%sh
mkdir ml-production/mlflow-model-training

In [48]:
%%sh
ls -al ml-production/mlflow-model-training

total 1112
drwxr-xr-x@ 6 azeltov  staff     192 May 14 13:23 .
drwxr-xr-x@ 5 azeltov  staff     160 May  8 12:50 ..
-rw-r--r--@ 1 azeltov  staff     433 May 14 13:14 MLproject
-rw-r--r--@ 1 azeltov  staff  554979 May  8 11:13 airbnb-cleaned-mlflow.csv
-rw-r--r--@ 1 azeltov  staff     170 May 14 13:14 conda.yaml
-rw-r--r--@ 1 azeltov  staff    2329 May  8 13:21 train.py


In [49]:
%%sh 
cat ml-production/mlflow-model-training/MLproject

name: Lesson-3-Model-Training

conda_env: conda.yaml

entry_points:
  main:
    parameters:
      data_path: {type: str, default: "airbnb-cleaned-mlflow.csv"}
      n_estimators: {type: int, default: 10}
      max_depth: {type: int, default: 20}
      max_features: {type: str, default: "auto"}
    command: "python train.py --data_path {data_path} --n_estimators {n_estimators} --max_depth {max_depth} --max_features {max_features}"

In [50]:
%%sh 
cat ml-production/mlflow-model-training/conda.yaml

name: ExperimentDemo-03
channels:
  - defaults
dependencies:
  - cloudpickle=0.5.3
  - numpy=1.14.3
  - pandas=0.23.0
  - scikit-learn=0.19.1
  - pip:
    - mlflow==0.9.1

In [51]:
%%sh 
cat ml-production/mlflow-model-training/train.py

import mlflow
from mlflow.exceptions import MlflowException
from mlflow.tracking import MlflowClient


import click
import mlflow.sklearn
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

experimentPath = "experiment-L3"

try:
  experimentID = mlflow.create_experiment(experimentPath)
except MlflowException:
  experimentID = MlflowClient().get_experiment_by_name(experimentPath).experiment_id
  mlflow.set_experiment(experimentPath)

print("The experiment can be found at the path `{}` and has an experiment_id of `{}`".format(experimentPath, experimentID))


@click.command()
@click.option("--data_path", default="airbnb-cleaned-mlflow.csv", type=str)
@click.option("--n_estimators", default=10, type=int)
@click.option("--max_depth", default=20, type=int)
@click.option("--max_features", default="auto", type=str)
@click.option("--experiment_id

## Demo MlFlow Run using SDK

In [40]:
import mlflow

mlflow.projects.run('/Users/azeltov/git/mlflowdemo/ml-production/mlflow-model-training',
  parameters={
    "data_path": "/Users/azeltov/git/mlflowdemo/airbnb-cleaned-mlflow.csv",
    "n_estimators": 10,
    "max_depth": 20,
    "max_features": "auto"
})

2019/05/14 13:23:17 INFO mlflow.projects: === Creating conda environment mlflow-3ac087c524740ffd4971ebd574911fe7d41ee4df ===
2019/05/14 13:23:51 INFO mlflow.projects: === Created directory /var/folders/lg/nddnlchd075brd12q3x_6lm40000gn/T/tmp2qyanxld for downloading remote URIs passed to arguments of type 'path' ===
2019/05/14 13:23:51 INFO mlflow.projects: === Running command 'source activate mlflow-3ac087c524740ffd4971ebd574911fe7d41ee4df && python train.py --data_path /Users/azeltov/git/mlflowdemo/airbnb-cleaned-mlflow.csv --n_estimators 10 --max_depth 20 --max_features auto' in run with ID 'f391a7b2e42e4b5baa82a9a73e15d557' === 
2019/05/14 13:23:53 INFO mlflow.projects: === Run (ID 'f391a7b2e42e4b5baa82a9a73e15d557') succeeded ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x11b865160>

## Demo MlFlow Run using Python Direct

In [44]:
%%sh
python  ml-production/mlflow-model-training/train.py --data_path /Users/azeltov/git/mlflowdemo/airbnb-cleaned-mlflow.csv --n_estimators 10 --max_depth 20 --max_features auto --experiment_id=2 --run_name=cli

The experiment can be found at the path `experiment-L3` and has an experiment_id of `2`


  from numpy.core.umath_tests import inner1d


## Demo MlFlow Run using MlFlow CLI

In [42]:
%%sh

mlflow run ml-production/mlflow-model-training/ -P data_path=/Users/azeltov/git/mlflowdemo/airbnb-cleaned-mlflow.csv

The experiment can be found at the path `experiment-L3` and has an experiment_id of `2`


2019/05/14 13:36:27 INFO mlflow.projects: === Created directory /var/folders/lg/nddnlchd075brd12q3x_6lm40000gn/T/tmpha8mxzab for downloading remote URIs passed to arguments of type 'path' ===
2019/05/14 13:36:27 INFO mlflow.projects: === Running command 'source activate mlflow-3ac087c524740ffd4971ebd574911fe7d41ee4df && python train.py --data_path /Users/azeltov/git/mlflowdemo/airbnb-cleaned-mlflow.csv --n_estimators 10 --max_depth 20 --max_features auto' in run with ID '9ff60f3b5fa646c986d6d30816e6c03b' === 
2019/05/14 13:36:30 INFO mlflow.projects: === Run (ID '9ff60f3b5fa646c986d6d30816e6c03b') succeeded ===


## Demo MlFlow Run using SDK  - Github example

In [47]:
mlflow.run(
  uri="https://github.com/mlflow/mlflow-example",
  parameters={'alpha':0.4}
)

2019/05/14 13:43:17 INFO mlflow.projects: === Fetching project from https://github.com/mlflow/mlflow-example into /var/folders/lg/nddnlchd075brd12q3x_6lm40000gn/T/tmpzvf9g6sm ===
2019/05/14 13:43:18 INFO mlflow.projects: === Creating conda environment mlflow-3eee9bd7a0713cf80a17bc0a4d659bc9c549efac ===
2019/05/14 13:43:42 INFO mlflow.projects: === Created directory /var/folders/lg/nddnlchd075brd12q3x_6lm40000gn/T/tmpxiav1f4u for downloading remote URIs passed to arguments of type 'path' ===
2019/05/14 13:43:42 INFO mlflow.projects: === Running command 'source activate mlflow-3eee9bd7a0713cf80a17bc0a4d659bc9c549efac && python train.py 0.4 0.1' in run with ID '4f2959dc526441af9e7f41aaa992e779' === 
2019/05/14 13:43:44 INFO mlflow.projects: === Run (ID '4f2959dc526441af9e7f41aaa992e779') succeeded ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x1a203667f0>

## Demo MlFlow Run on Remote Cluster Databricks

In [None]:
# clusterspecs = {
#     "num_workers": 2,
#     "spark_version": "5.3.x-cpu-ml-scala2.11",
#     "node_type_id": "Standard_DS3_v2",
#     "driver_node_type_id": "Standard_DS3_v2",
# }
# 
# mlflow.projects.run(
#   uri=train_path.replace("dbfs:","/dbfs"),
#   parameters={
#     "data_path": "/dbfs/mnt/training/airbnb/sf-listings/airbnb-cleaned-mlflow.csv",
#     "n_estimators": 1500,
#     "max_depth": 5,
#     "max_features": "sqrt"
# },
#   mode="databricks",
#   cluster_spec=clusterspecs
# )