Skip to content

Commit

Permalink
[air/output] Collection of small AIR output improvements (ray-project…
Browse files Browse the repository at this point in the history
…#37571)

This PR includes a number of improvements to Ray AIRs context-aware output engine

    Remove verbose=x from a number of examples (

[AIR output] Use verbose =1 by default in train or tune examples  ray-project#36911)
Use blocks to logically group outputs/control newlines (
[AIR output] Remove the blank line between reported results and checkpoint info.  ray-project#36815)
Exclude _report_on (
[AIR Output] Not sure if some metrics for lightning trainer are unnecessary ray-project#36818)
Extend blacklisted keys in result repr (
[AIR output] make the best result of tune output more readable ray-project#36791)

Signed-off-by: Kai Fricke <kai@anyscale.com>
Signed-off-by: harborn <gangsheng.wu@intel.com>
  • Loading branch information
krfricke authored and harborn committed Aug 17, 2023
1 parent 6c0f271 commit 628bf93
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 18 deletions.
1 change: 0 additions & 1 deletion doc/source/ray-air/doc_code/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ def get_another_dataset():
run_config = RunConfig(
name="MyExperiment",
storage_path="s3://...",
verbose=2,
checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),
)
# __run_config_end__
Expand Down
3 changes: 1 addition & 2 deletions doc/source/ray-air/examples/analyze_tuning_results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"\n",
"import ray\n",
"from ray import tune\n",
"from ray.air import RunConfig, ScalingConfig\n",
"from ray.air import ScalingConfig\n",
"from ray.train.xgboost import XGBoostTrainer\n",
"from ray.tune.tune_config import TuneConfig\n",
"from ray.tune.tuner import Tuner"
Expand Down Expand Up @@ -215,7 +215,6 @@
"source": [
"tuner = Tuner(\n",
" trainer,\n",
" run_config=RunConfig(verbose=1),\n",
" param_space={\n",
" \"params\": {\n",
" \"max_depth\": tune.randint(2, 8), \n",
Expand Down
2 changes: 0 additions & 2 deletions doc/source/ray-air/examples/batch_tuning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -688,8 +688,6 @@
" # redirect logs to relative path instead of default ~/ray_results/\n",
" storage_path=\"my_Tune_logs\",\n",
" name=\"batch_tuning\",\n",
" # Set Ray Tune verbosity. Print summary table only with levels 2 or 3.\n",
" verbose=2,\n",
" ),\n",
")\n",
"\n",
Expand Down
2 changes: 0 additions & 2 deletions doc/source/train/doc_code/key_concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ def train_fn(config):
# The experiment results will be saved to: storage_path/name
storage_path="~/ray_results",
# storage_path="s3://my_bucket/tune_results",
# Low training verbosity.
verbose=1,
# Custom and built-in callbacks
callbacks=[WandbLoggerCallback()],
# Stopping criteria
Expand Down
2 changes: 0 additions & 2 deletions doc/source/tune/examples/pbt_guide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@
" # Stop when we've reached a threshold accuracy, or a maximum\n",
" # training_iteration, whichever comes first\n",
" stop={\"mean_accuracy\": 0.96, \"training_iteration\": 50},\n",
" verbose=1,\n",
" checkpoint_config=air.CheckpointConfig(\n",
" checkpoint_score_attribute=\"mean_accuracy\",\n",
" num_to_keep=4,\n",
Expand Down Expand Up @@ -576,7 +575,6 @@
" run_config=air.RunConfig(\n",
" name=\"pbt_dcgan_mnist_tutorial\",\n",
" stop={\"training_iteration\": 5 if smoke_test else 150},\n",
" verbose=1,\n",
" ),\n",
" tune_config=tune.TuneConfig(\n",
" metric=\"is_score\",\n",
Expand Down
5 changes: 4 additions & 1 deletion python/ray/air/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def path(self) -> str:
def _repr(self, indent: int = 0) -> str:
"""Construct the representation with specified number of space indent."""
from ray.tune.result import AUTO_RESULT_KEYS
from ray.tune.experimental.output import BLACKLISTED_KEYS

shown_attributes = {k: getattr(self, k) for k in self._items_to_repr}
if self.error:
Expand All @@ -106,8 +107,10 @@ def _repr(self, indent: int = 0) -> str:
shown_attributes.pop("error")

if self.metrics:
exclude = set(AUTO_RESULT_KEYS)
exclude.update(BLACKLISTED_KEYS)
shown_attributes["metrics"] = {
k: v for k, v in self.metrics.items() if k not in AUTO_RESULT_KEYS
k: v for k, v in self.metrics.items() if k not in exclude
}

cls_indent = " " * indent
Expand Down
33 changes: 25 additions & 8 deletions python/ray/tune/experimental/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
"trial_id",
"experiment_tag",
"should_checkpoint",
"_report_on", # LIGHTNING_REPORT_STAGE_KEY
}

VALID_SUMMARY_TYPES = {
Expand Down Expand Up @@ -615,6 +616,8 @@ def __init__(
self._progress_metrics = progress_metrics
self._trial_last_printed_results = {}

self._in_block = None

@property
def verbosity(self) -> AirVerbosity:
return self._verbosity
Expand All @@ -626,6 +629,19 @@ def setup(
):
self._start_time = start_time

def _start_block(self, indicator: Any):
if self._in_block != indicator:
self._end_block()
self._in_block = indicator

def _end_block(self):
if self._in_block:
print("")
self._in_block = None

def on_experiment_end(self, trials: List["Trial"], **info):
self._end_block()

def experiment_started(
self,
experiment_name: str,
Expand All @@ -636,6 +652,7 @@ def experiment_started(
tensorboard_path: Optional[str] = None,
**kwargs,
):
self._start_block("exp_start")
print(f"\nView detailed results here: {experiment_path}")

if tensorboard_path:
Expand All @@ -644,8 +661,6 @@ def experiment_started(
f"`tensorboard --logdir {tensorboard_path}`"
)

print("")

@property
def _time_heartbeat_str(self):
current_time_str, running_time_str = _get_time_str(
Expand Down Expand Up @@ -697,14 +712,14 @@ def on_trial_result(
):
if self.verbosity < self._intermediate_result_verbosity:
return
self._start_block(f"trial_{trial}_result_{result[TRAINING_ITERATION]}")
curr_time_str, running_time_str = _get_time_str(self._start_time, time.time())
print(
f"{self._addressing_tmpl.format(trial)} "
f"finished iteration {result[TRAINING_ITERATION]} "
f"at {curr_time_str}. Total running time: " + running_time_str
)
self._print_result(trial, result)
print("")

def on_trial_complete(
self, iteration: int, trials: List[Trial], trial: Trial, **info
Expand All @@ -715,13 +730,14 @@ def on_trial_complete(
finished_iter = 0
if trial.last_result and TRAINING_ITERATION in trial.last_result:
finished_iter = trial.last_result[TRAINING_ITERATION]

self._start_block(f"trial_{trial}_complete")
print(
f"{self._addressing_tmpl.format(trial)} "
f"completed after {finished_iter} iterations "
f"at {curr_time_str}. Total running time: " + running_time_str
)
self._print_result(trial)
print("")

def on_checkpoint(
self,
Expand All @@ -737,18 +753,20 @@ def on_checkpoint(
saved_iter = "?"
if trial.last_result and TRAINING_ITERATION in trial.last_result:
saved_iter = trial.last_result[TRAINING_ITERATION]

self._start_block(f"trial_{trial}_result_{saved_iter}")
print(
f"{self._addressing_tmpl.format(trial)} "
f"saved a checkpoint for iteration {saved_iter} "
f"at: {checkpoint.dir_or_data}"
)
print("")

def on_trial_start(self, iteration: int, trials: List[Trial], trial: Trial, **info):
if self.verbosity < self._start_end_verbosity:
return
has_config = bool(trial.config)

self._start_block(f"trial_{trial}_start")
if has_config:
print(
f"{self._addressing_tmpl.format(trial)} " f"started with configuration:"
Expand All @@ -759,7 +777,6 @@ def on_trial_start(self, iteration: int, trials: List[Trial], trial: Trial, **in
f"{self._addressing_tmpl.format(trial)} "
f"started without custom configuration."
)
print("")


def _detect_reporter(
Expand Down Expand Up @@ -930,6 +947,7 @@ def _print_heartbeat(self, trials, *sys_args, force: bool = False):
trials, *sys_args, force_full_output=force
)

self._start_block("heartbeat")
for s in heartbeat_strs:
print(s)
# now print the table using Tabulate
Expand All @@ -951,12 +969,12 @@ def _print_heartbeat(self, trials, *sys_args, force: bool = False):
)
if more_infos:
print(", ".join(more_infos))
print()

trials_with_error = _get_trials_with_error(trials)
if not trials_with_error:
return

self._start_block("status_errored")
print(f"Number of errored trials: {len(trials_with_error)}")
fail_header = ["Trial name", "# failures", "error file"]
fail_table_data = [
Expand All @@ -978,7 +996,6 @@ def _print_heartbeat(self, trials, *sys_args, force: bool = False):
)
if any(trial.status == Trial.TERMINATED for trial in trials_with_error):
print("* The trial terminated successfully after retrying.")
print()


class TuneRichReporter(TuneReporterBase):
Expand Down
2 changes: 2 additions & 0 deletions python/ray/tune/tests/test_result_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,11 @@ def f(config):
result = result_grid[0]

from ray.tune.result import AUTO_RESULT_KEYS
from ray.tune.experimental.output import BLACKLISTED_KEYS

representation = result.__repr__()
assert not any(key in representation for key in AUTO_RESULT_KEYS)
assert not any(key in representation for key in BLACKLISTED_KEYS)


def test_result_grid_repr():
Expand Down

0 comments on commit 628bf93

Please sign in to comment.