Fixes with API refactor for object based execution

ahmed-shariff · Apr 5, 2020 · 716facd · 716facd
1 parent d6aedcd
commit 716facd
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 25 deletions.
diff --git a/mlpipeline/_pipeline_subprocess.py b/mlpipeline/_pipeline_subprocess.py
@@ -143,7 +143,8 @@ def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_d
 
         experiment_dir, tracking_uri = _get_experiment_dir(Path(current_experiment.name).stem,
                                                            version_spec,
-                                                           config.experiment_mode)
+                                                           config.experiment_mode,
+                                                           config)
         record_training = True if config.experiment_mode != ExperimentModeKeys.TEST else False
         if clean_experiment_dir and current_experiment.allow_delete_experiment_dir:
             try:
@@ -202,7 +203,7 @@ def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_d
                 test__eval_steps = 1 if test__eval_steps is not None else None
                 train_eval_steps = 1 if train_eval_steps is not None else None
 
-            _save_training_time(current_experiment, version_name)
+            _save_training_time(current_experiment, version_name, config)
 
             try:
                 input_fn = dataloader.get_train_input(mode=ExecutionModeKeys.TRAIN)
@@ -312,7 +313,7 @@ def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_d
             _add_to_and_return_result_string("DATALOADER	 SUMMERY:")
             _add_to_and_return_result_string(dataloader.summery)
             if record_training and not config.no_log:
-                _save_results_to_file(_add_to_and_return_result_string(), current_experiment)
+                _save_results_to_file(_add_to_and_return_result_string(), current_experiment, config)
 
         except Exception as e:
             mlflow.end_run(mlflow.entities.RunStatus.to_string(mlflow.entities.RunStatus.FAILED))
@@ -330,20 +331,20 @@ def _experiment_main_loop(current_experiment, version_name_s, clean_experiment_d
     return True
 
 
-def _get_experiment_dir(experiment_name, version_spec, mode):
+def _get_experiment_dir(experiment_name, version_spec, mode, config):
     experiment_dir_suffix = version_spec[version_parameters.EXPERIMENT_DIR_SUFFIX]
     if mode == ExperimentModeKeys.TEST:
-        experiment_dir = os.path.join(CONFIG.experiments_outputs_dir, "experiment_ckpts/temp")
+        experiment_dir = os.path.join(config.experiments_outputs_dir, "experiment_ckpts/temp")
         tracking_uri = os.path.abspath(os.path.join(experiment_dir, "mlruns_tmp"))
         shutil.rmtree(experiment_dir, ignore_errors=True)
     else:
         experiment_dir_suffix = experiment_dir_suffix \
             if experiment_dir_suffix is not None else version_spec.name
         experiment_dir_suffix = "-" + experiment_dir_suffix
-        experiment_dir = os.path.join(CONFIG.experiments_outputs_dir,
+        experiment_dir = os.path.join(config.experiments_outputs_dir,
                                       "experiment_ckpts/{}{}".format(experiment_name,
                                                                      experiment_dir_suffix))
-        tracking_uri = CONFIG.mlflow_tracking_uri
+        tracking_uri = config.mlflow_tracking_uri
     from six.moves import urllib
     scheme = urllib.parse.urlparse(tracking_uri).scheme
     if len(scheme) == 1 or len(scheme) == 0:
@@ -462,36 +463,36 @@ def _get_experiment(file_path,
     return experiment, returning_version, clean_experiment_dir
 
 
-def _save_training_time(experiment, version_):
-    if CONFIG.experiment_mode == ExperimentModeKeys.TEST:
+def _save_training_time(experiment, version_, config):
+    if config.experiment_mode == ExperimentModeKeys.TEST:
         return
     name = experiment.name
-    with open(CONFIG.training_history_log_file, "a") as log_file:
+    with open(config.training_history_log_file, "a") as log_file:
         time = datetime.now().timestamp()
-        CONFIG.executed_experiments[name].version.addExecutingVersion(version_, time)
+        config.executed_experiments[name].version.addExecutingVersion(version_, time)
         log("Executing version: {0}".format(
-            CONFIG.executed_experiments[experiment.name].version.executing_version),
+            config.executed_experiments[experiment.name].version.executing_version),
             log_to_file=False)
         log_file.write("{0}::{1}::{2}\n".format(name,
-                                                CONFIG.executed_experiments[name].version.executing_version,
+                                                config.executed_experiments[name].version.executing_version,
                                                 time))
 
 
-def _save_results_to_file(resultString, experiment):
+def _save_results_to_file(resultString, experiment, config):
     modified_dt = datetime.isoformat(datetime.fromtimestamp(
-        CONFIG.executed_experiments[experiment.name].modified_time))
+        config.executed_experiments[experiment.name].modified_time))
     result_dt = datetime.now().isoformat()
 
-    with open(CONFIG.output_file, 'a', encoding="utf-8") as outfile:
+    with open(config.output_file, 'a', encoding="utf-8") as outfile:
         outfile.write("\n[{0}]:ml-pipline: output: \n".format(result_dt))
         outfile.write(resultString)
-    with open(CONFIG.history_file, 'a', encoding="utf-8") as hist_file:
+    with open(config.history_file, 'a', encoding="utf-8") as hist_file:
         hist_file.write("{0}::{1}::{2}\n".format(
             experiment.name,
-            CONFIG.executed_experiments[experiment.name].modified_time,
-            CONFIG.executed_experiments[experiment.name].version.executing_version))
+            config.executed_experiments[experiment.name].modified_time,
+            config.executed_experiments[experiment.name].version.executing_version))
 
-    CONFIG.executed_experiments[experiment.name].version.moveExecutingToExecuted()
+    config.executed_experiments[experiment.name].version.moveExecutingToExecuted()
 
 
 def _execute_exeperiment(file_path,

diff --git a/mlpipeline/api.py b/mlpipeline/api.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 from mlpipeline._pipeline import (_mlpipeline_main_loop, _init_pipeline)
 from mlpipeline._pipeline_subprocess import (_execute_exeperiment,
                                              _get_experiment_dir,
@@ -35,13 +36,25 @@ def mlpipeline_execute_exeperiment(experiment,
                                    experiment_mode=ExperimentModeKeys.TEST,
                                    whitelist_versions=None,
                                    blacklist_versions=None,
+                                   mlflow_tracking_uri=None,
                                    pipeline_config=None):
     if pipeline_config is None:
-        pipeline_config = PipelineConfig()
-    pipeline_config.output_file = os.path.join(pipeline_config.experiments_dir, "output")
-    pipeline_config.history_file = os.path.join(pipeline_config.experiments_dir, "history")
-    pipeline_config.training_history_log_file = os.path.join(pipeline_config.experiments_dir, "t_history")
-    pipeline_config.log_file = os.path.join(pipeline_config.experiments_dir, "log")
+        pipeline_config = PipelineConfig(experiments_dir="", experiments_outputs_dir="outputs", mlflow_tracking_uri=".mlruns")
+    pipeline_config.output_file = Path(os.path.join(pipeline_config.experiments_outputs_dir, "output"))
+    pipeline_config.history_file = Path(os.path.join(pipeline_config.experiments_outputs_dir, "history"))
+    pipeline_config.training_history_log_file = Path(os.path.join(pipeline_config.experiments_outputs_dir, "t_history"))
+    pipeline_config.log_file = Path(os.path.join(pipeline_config.experiments_outputs_dir, "log"))
+
+    pipeline_config.output_file.parent.mkdir(parents=True, exist_ok=True)
+    pipeline_config.history_file.parent.mkdir(parents=True, exist_ok=True)
+    pipeline_config.training_history_log_file.parent.mkdir(parents=True, exist_ok=True)
+    pipeline_config.log_file.parent.mkdir(parents=True, exist_ok=True)
+
+    pipeline_config.output_file.touch()
+    pipeline_config.history_file.touch()
+    pipeline_config.training_history_log_file.touch()
+    pipeline_config.log_file.touch()
+
     pipeline_config.logger = set_logger(experiment_mode=experiment_mode,
                                         no_log=False,
                                         log_file=pipeline_config.log_file)