Skip to content

Commit

Permalink
Merge pull request mlflow#24 from criteo-forks/master-sync
Browse files Browse the repository at this point in the history
Master sync
  • Loading branch information
Djailla committed Oct 30, 2019
2 parents 1184634 + 424ed02 commit c49d10c
Show file tree
Hide file tree
Showing 36 changed files with 537 additions and 170 deletions.
2 changes: 2 additions & 0 deletions docs/source/search-syntax.rst
Expand Up @@ -156,6 +156,7 @@ with 10 layers and had a prediction accuracy of 94.5% or higher, use:
.. code-block:: py
from mlflow.tracking.client import MlflowClient
from mlflow.entities import ViewType
query = "params.model = 'CNN' and params.layers = '10' and metrics.'prediction accuracy' >= 0.945"
runs = MlflowClient().search_runs(["3", "4", "17"], query, ViewType.ACTIVE_ONLY)
Expand All @@ -165,6 +166,7 @@ To search all known experiments for any MLflow runs created using the Inception
.. code-block:: py
from mlflow.tracking.client import MlflowClient
from mlflow.entities import ViewType
all_experiments = [exp.experiment_id for exp in MlflowClient().list_experiments()]
runs = MlflowClient().search_runs(all_experiments, "params.model = 'Inception'", ViewType.ALL)
Expand Down
2 changes: 1 addition & 1 deletion examples/sklearn_elasticnet_wine/MLproject
Expand Up @@ -5,6 +5,6 @@ conda_env: conda.yaml
entry_points:
main:
parameters:
alpha: float
alpha: {type: float, default: 0.5}
l1_ratio: {type: float, default: 0.1}
command: "python train.py {alpha} {l1_ratio}"
3 changes: 3 additions & 0 deletions mlflow/__init__.py
Expand Up @@ -52,11 +52,14 @@
log_artifacts = mlflow.tracking.fluent.log_artifacts
log_artifact = mlflow.tracking.fluent.log_artifact
active_run = mlflow.tracking.fluent.active_run
get_run = mlflow.tracking.fluent.get_run
start_run = mlflow.tracking.fluent.start_run
end_run = mlflow.tracking.fluent.end_run
search_runs = mlflow.tracking.fluent.search_runs
get_artifact_uri = mlflow.tracking.fluent.get_artifact_uri
set_tracking_uri = tracking.set_tracking_uri
get_experiment = mlflow.tracking.fluent.get_experiment
get_experiment_by_name = mlflow.tracking.fluent.get_experiment_by_name
get_tracking_uri = tracking.get_tracking_uri
create_experiment = mlflow.tracking.fluent.create_experiment
set_experiment = mlflow.tracking.fluent.set_experiment
Expand Down
6 changes: 6 additions & 0 deletions mlflow/entities/model_registry/model_version.py
Expand Up @@ -4,6 +4,12 @@


class ModelVersion(_ModelRegistryEntity):
"""
Note:: Experimental: This entity may change or be removed in a future release without warning.
MLflow entity for Model Version.
A model version is uniquely identified using underlying
:py:class:`mlflow.entities.model_registry.RegisteredModel` and version number.
"""
def __init__(self, registered_model, version):
"""
Construct a :py:class:`mlflow.entities.model_registry.RegisteredModel` instance
Expand Down
7 changes: 7 additions & 0 deletions mlflow/entities/model_registry/model_version_detailed.py
Expand Up @@ -5,6 +5,13 @@


class ModelVersionDetailed(ModelVersion):
"""
Note:: Experimental: This entity may change or be removed in a future release without warning.
MLflow entity for Model Version Detailed.
Provides additional metadata data for model version in addition to information in
:py:class:`mlflow.entities.model_registry.ModelVersion`.
"""

def __init__(self, registered_model, version, creation_timestamp, last_updated_timestamp=None,
description=None, user_id=None, current_stage=None, source=None, run_id=None,
status=None, status_message=None):
Expand Down
6 changes: 6 additions & 0 deletions mlflow/entities/model_registry/registered_model.py
Expand Up @@ -3,6 +3,12 @@


class RegisteredModel(_ModelRegistryEntity):
"""
Note:: Experimental: This entity may change or be removed in a future release without warning.
MLflow entity for Registered Model.
A registered model entity is uniquely identified by its name.
"""

def __init__(self, name):
"""
Construct a :py:class:`mlflow.entities.model_registry.RegisteredModel`
Expand Down
9 changes: 7 additions & 2 deletions mlflow/entities/model_registry/registered_model_detailed.py
Expand Up @@ -4,12 +4,17 @@


class RegisteredModelDetailed(RegisteredModel):
# __init__ method to initialize fields
"""
Note:: Experimental: This entity may change or be removed in a future release without warning.
MLflow entity for Registered Model Detailed.
Provides additional metadata data for registered model in addition to information in
:py:class:`mlflow.entities.model_registry.RegisteredModel`.
"""

def __init__(self, name, creation_timestamp, last_updated_timestamp=None, description=None,
latest_versions=None):
# Constructor is called only from within the system by various backend stores.
super(RegisteredModelDetailed, self).__init__(name)
self._name = name
self._creation_time = creation_timestamp
self._last_updated_timestamp = last_updated_timestamp
self._description = description
Expand Down
7 changes: 4 additions & 3 deletions mlflow/h2o.py
Expand Up @@ -126,9 +126,10 @@ def log_model(h2o_model, artifact_path, conda_env=None, registered_model_name=No
]
]
}
:param registered_model_name: If given, create a model version under ``registered_model_name``,
also creating a registered model if one with the given name does
not exist.
:param registered_model_name: Note:: Experimental: This argument may change or be removed in a
future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
:param kwargs: kwargs to pass to ``h2o.save_model`` method.
"""
Model.log(artifact_path=artifact_path, flavor=mlflow.h2o,
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions mlflow/keras.py
Expand Up @@ -212,9 +212,10 @@ def log_model(keras_model, artifact_path, conda_env=None, custom_objects=None, k
:param keras_module: Keras module to be used to save / load the model
(``keras`` or ``tf.keras``). If not provided, MLflow will
attempt to infer the Keras module based on the given model.
:param registered_model_name: If given, create a model version under ``registered_model_name``,
also creating a registered model if one with the given name does
not exist.
:param registered_model_name: Note:: Experimental: This argument may change or be removed in a
future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
:param kwargs: kwargs to pass to ``keras_model.save`` method.
>>> from keras import Dense, layers
Expand Down
7 changes: 4 additions & 3 deletions mlflow/mleap.py
Expand Up @@ -36,9 +36,10 @@ def log_model(spark_model, sample_input, artifact_path, registered_model_name=No
:param sample_input: Sample PySpark DataFrame input that the model can evaluate. This is
required by MLeap for data schema inference.
:param artifact_path: Run-relative artifact path.
:param registered_model_name: If given, create a model version under ``registered_model_name``,
also creating a registered model if one with the given name does
not exist.
:param registered_model_name: Note:: Experimental: This argument may change or be removed in a
future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
>>> import mlflow
>>> import mlflow.mleap
Expand Down
7 changes: 4 additions & 3 deletions mlflow/models/__init__.py
Expand Up @@ -68,9 +68,10 @@ def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
:param flavor: Flavor module to save the model with. The module must have
the ``save_model`` function that will persist the model as a valid
MLflow model.
:param registered_model_name: If given, create a model version under
``registered_model_name``, also creating a registered model
if one with the given name does not exist.
:param registered_model_name: Note:: Experimental: This argument may change or be removed
in a future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
:param kwargs: Extra args passed to the model flavor.
"""
with TempDir() as tmp:
Expand Down
7 changes: 4 additions & 3 deletions mlflow/onnx.py
Expand Up @@ -232,9 +232,10 @@ def log_model(onnx_model, artifact_path, conda_env=None, registered_model_name=N
'onnxruntime=0.3.0'
]
}
:param registered_model_name: If given, create a model version under ``registered_model_name``,
also creating a registered model if one with the given name does
not exist.
:param registered_model_name: Note:: Experimental: This argument may change or be removed in a
future release without warning. If given, create a model
version under ``registered_model_name``, also creating a
registered model if one with the given name does not exist.
"""
Model.log(artifact_path=artifact_path, flavor=mlflow.onnx,
onnx_model=onnx_model, conda_env=conda_env,
Expand Down
42 changes: 28 additions & 14 deletions mlflow/projects/_project_spec.py
Expand Up @@ -11,44 +11,61 @@
from mlflow.utils.file_utils import get_local_path_or_none


MLPROJECT_FILE_NAME = "MLproject"
MLPROJECT_FILE_NAME = "mlproject"
DEFAULT_CONDA_FILE_NAME = "conda.yaml"


def _find_mlproject(directory):
filenames = os.listdir(directory)
for filename in filenames:
if filename.lower() == MLPROJECT_FILE_NAME:
return os.path.join(directory, filename)
return None


def load_project(directory):
mlproject_path = os.path.join(directory, MLPROJECT_FILE_NAME)
mlproject_path = _find_mlproject(directory)

# TODO: Validate structure of YAML loaded from the file
if os.path.exists(mlproject_path):
yaml_obj = {}
if mlproject_path is not None:
with open(mlproject_path) as mlproject_file:
yaml_obj = yaml.safe_load(mlproject_file.read())
else:
yaml_obj = {}
yaml_obj = yaml.safe_load(mlproject_file)

project_name = yaml_obj.get("name")
if not project_name:
project_name = None
conda_path = yaml_obj.get("conda_env")

# Validate config if docker_env parameter is present
docker_env = yaml_obj.get("docker_env")
if docker_env and not docker_env.get("image"):
raise ExecutionException("Docker environment specified but no image "
"attribute found.")

# Validate config if conda_env parameter is present
conda_path = yaml_obj.get("conda_env")
if conda_path and docker_env:
raise ExecutionException("Project cannot contain both a docker and conda environment.")
raise ExecutionException("Project cannot contain both a docker and "
"conda environment.")

# Parse entry points
entry_points = {}
for name, entry_point_yaml in yaml_obj.get("entry_points", {}).items():
parameters = entry_point_yaml.get("parameters", {})
command = entry_point_yaml.get("command")
entry_points[name] = EntryPoint(name, parameters, command)

if conda_path:
conda_env_path = os.path.join(directory, conda_path)
if not os.path.exists(conda_env_path):
raise ExecutionException("Project specified conda environment file %s, but no such "
"file was found." % conda_env_path)
return Project(conda_env_path=conda_env_path, entry_points=entry_points,
docker_env=docker_env, name=project_name,)

default_conda_path = os.path.join(directory, DEFAULT_CONDA_FILE_NAME)
if os.path.exists(default_conda_path):
return Project(conda_env_path=default_conda_path, entry_points=entry_points,
docker_env=docker_env, name=project_name)

return Project(conda_env_path=None, entry_points=entry_points,
docker_env=docker_env, name=project_name)

Expand Down Expand Up @@ -92,10 +109,7 @@ def _validate_parameters(self, user_parameters):
for name in self.parameters:
if (name not in user_parameters and self.parameters[name].default is None):
missing_params.append(name)
if len(missing_params) == 1:
raise ExecutionException(
"No value given for missing parameter: '%s'" % missing_params[0])
elif len(missing_params) > 1:
if missing_params:
raise ExecutionException(
"No value given for missing parameters: %s" %
", ".join(["'%s'" % name for name in missing_params]))
Expand Down
19 changes: 10 additions & 9 deletions mlflow/projects/databricks.py
Expand Up @@ -6,6 +6,7 @@
import textwrap
import time
import logging
import posixpath

from six.moves import shlex_quote

Expand All @@ -23,9 +24,9 @@
# Base directory within driver container for storing files related to MLflow
DB_CONTAINER_BASE = "/databricks/mlflow"
# Base directory within driver container for storing project archives
DB_TARFILE_BASE = os.path.join(DB_CONTAINER_BASE, "project-tars")
DB_TARFILE_BASE = posixpath.join(DB_CONTAINER_BASE, "project-tars")
# Base directory directory within driver container for storing extracted project directories
DB_PROJECTS_BASE = os.path.join(DB_CONTAINER_BASE, "projects")
DB_PROJECTS_BASE = posixpath.join(DB_CONTAINER_BASE, "projects")
# Name to use for project directory when archiving it for upload to DBFS; the TAR will contain
# a single directory with this name
DB_TARFILE_ARCHIVE_NAME = "mlflow-project"
Expand Down Expand Up @@ -131,9 +132,9 @@ def custom_filter(x):
with open(temp_tar_filename, "rb") as tarred_project:
tarfile_hash = hashlib.sha256(tarred_project.read()).hexdigest()
# TODO: Get subdirectory for experiment from the tracking server
dbfs_path = os.path.join(DBFS_EXPERIMENT_DIR_BASE, str(experiment_id),
"projects-code", "%s.tar.gz" % tarfile_hash)
dbfs_fuse_uri = os.path.join("/dbfs", dbfs_path)
dbfs_path = posixpath.join(DBFS_EXPERIMENT_DIR_BASE, str(experiment_id),
"projects-code", "%s.tar.gz" % tarfile_hash)
dbfs_fuse_uri = posixpath.join("/dbfs", dbfs_path)
if not self._dbfs_path_exists(dbfs_path):
self._upload_to_dbfs(temp_tar_filename, dbfs_fuse_uri)
_logger.info("=== Finished uploading project to %s ===", dbfs_fuse_uri)
Expand Down Expand Up @@ -233,10 +234,10 @@ def _get_databricks_run_cmd(dbfs_fuse_tar_uri, run_id, entry_point, parameters):
Generate MLflow CLI command to run on Databricks cluster in order to launch a run on Databricks.
"""
# Strip ".gz" and ".tar" file extensions from base filename of the tarfile
tar_hash = os.path.splitext(os.path.splitext(os.path.basename(dbfs_fuse_tar_uri))[0])[0]
container_tar_path = os.path.abspath(os.path.join(DB_TARFILE_BASE,
os.path.basename(dbfs_fuse_tar_uri)))
project_dir = os.path.join(DB_PROJECTS_BASE, tar_hash)
tar_hash = posixpath.splitext(posixpath.splitext(posixpath.basename(dbfs_fuse_tar_uri))[0])[0]
container_tar_path = posixpath.abspath(posixpath.join(DB_TARFILE_BASE,
posixpath.basename(dbfs_fuse_tar_uri)))
project_dir = posixpath.join(DB_PROJECTS_BASE, tar_hash)
mlflow_run_arr = list(map(shlex_quote, ["mlflow", "run", project_dir,
"--entry-point", entry_point]))
if run_id:
Expand Down

0 comments on commit c49d10c

Please sign in to comment.