# MLFLOW in Jupyter

In [1]:
import os
from pathlib import Path
import mlflow
from mlflow import log_metric, log_param, log_artifact

## Basic Tracking

In [4]:
!export MLFLOW_TRACKING_URI

In [5]:
!echo $MLFLOW_TRACKING_URI

one example


In [2]:
# Set experiment name
mlflow.set_experiment("Experiment1")

# Run the experiment
with mlflow.start_run():

    # Get configured tracking uri and artifact uri and generated run_id and experiment
    print ("MLFlow Tracking: ", mlflow.tracking.get_tracking_uri())
    print ("MLFlow Artifacts: ", mlflow.get_artifact_uri())
    print ("Mlflow rund-id: " + mlflow.active_run().info.run_id)
    
    # Set tags in the experiment
    mlflow.set_tag("Experiment", "test")
    
    # Log an artifact in the experiment
    mlflow.log_artifact("./data/mlflow.png", artifact_path="MLFlow Picture")
    
    # Log metrics in the experiment
    mlflow.log_metrics({"Score": 1, "Recall": 2, "Precission": 3, "F": 4})
    mlflow.log_metrics({"Area Under ROC": 1, "Area Under PR": 2})
    mlflow.log_metrics({"r2": 1, "rmse": 2, "mse": 2, "mae": 3})
    
    # Log parameters in the experiment
    mlflow.log_param('param1', 1)

INFO: 'Experiment1' does not exist. Creating a new experiment
MLFlow Tracking:  one example
MLFlow Artifacts:  one example/1/ef83828922c0406990c0dd6285f7e30d/artifacts
Mlflow rund-id: ef83828922c0406990c0dd6285f7e30d


NameError: name 'experiment' is not defined

## Model tracking

In [None]:
# Set experiment name
mlflow.set_experiment("Experiment2")

# Run the experiment
with mlflow.start_run():

    # Load mleap model
    mlflow.mleap.log_model(spark_model=featureModel, sample_input=sample, artifact_path="mleappath")
    
    # Load spark model    
    mlflow.spark.log_model(bestModel, "spark-model")
    
    # Load flask model    
    mlflow.???.log_model(???")

## Getting parameters from Spark for MLFLOW tracking

In [None]:
# Set experiment name
mlflow.set_experiment("Experiment3")

# Run the experiment
with mlflow.start_run():
    
    (dfTraining, dfValidation) = df.randomSplit([90.0, 10.0])
    crossvalPredictionModel = crossval.fit(dfTraining)
    
    #Get the best model, validate and get random forest stage
    bestModel = crossvalPredictionModel.bestModel
    bestModelPredictions = bestModel.transform(dfValidation)
    rfModel = bestModel.stages[2]
    
    #### Log model parameters
    for param in rfModel.extractParamMap():
        mlflow.log_param(param.name, rfModel.extractParamMap()[param])