Skip to content

Commit

Permalink
feat: support different tensorboard startup args [DET-5550] (determin…
Browse files Browse the repository at this point in the history
  • Loading branch information
azhou-determined committed Jun 22, 2021
1 parent 02bf3e8 commit a4fdf4b
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 12 deletions.
61 changes: 56 additions & 5 deletions harness/determined/exec/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import subprocess
import sys
import time
from typing import Callable, List
from typing import Callable, List, Tuple

import boto3
import requests
Expand Down Expand Up @@ -73,11 +73,10 @@ def main(args: List[str]) -> int:
port = os.environ["TENSORBOARD_PORT"]
tensorboard_addr = f"http://localhost:{port}/proxy/{task_id}"
url = f"{tensorboard_addr}/data/plugin/scalars/tags"
tensorboard_args = get_tensorboard_args(args)

print(f"Running: tensorboard --port{port} --path_prefix=/proxy/{task_id}", *args)
p = subprocess.Popen(
["tensorboard", f"--port={port}", f"--path_prefix=/proxy/{task_id}", *args]
)
print(f"Running: {tensorboard_args}")
p = subprocess.Popen(tensorboard_args)

def still_alive() -> bool:
return p.poll() is None
Expand All @@ -88,5 +87,57 @@ def still_alive() -> bool:
return p.wait()


def get_tensorboard_version(version: str) -> Tuple[str, str]:
"""
Gets the version of the tensorboard package currently installed. Used
by downstream processes to determine args passed in.
:return: version in the form of (major, minor) tuple
"""

major, minor, _ = version.split(".")

return major, minor


def get_tensorboard_args(args: List[str]) -> List[str]:
"""
Builds tensorboard startup args from args passed in from tensorboard-entrypoint.sh
Args are added and deprecated at the mercy of tensorboard; all of the below are necessary to
support versions 1.14, 2.4, and 2.5
- If multiple directories are specified and the tensorboard version is > 1,
use legacy logdir_spec behavior
- Tensorboard 2+ no longer exposes all ports. Must pass in "--bind_all" to expose localhost
- Tensorboard 2.5.0 introduces an experimental feature (default load_fast=true)
which prevents multiple plugins from loading correctly.
"""
task_id = os.environ["DET_TASK_ID"]
port = os.environ["TENSORBOARD_PORT"]

version = args.pop(0)

# logdir is the second argument passed in from tensorboard_manager.go. If multiple directories
# are specified and the tensorboard version is > 1, use legacy logdir_spec behavior. NOTE:
# legacy logdir_spec behavior is not supported by many tensorboard plugins
logdir = args.pop(0)

tensorboard_args = ["tensorboard", f"--port={port}", f"--path_prefix=/proxy/{task_id}", *args]

major, minor = get_tensorboard_version(version)

if major == "2":
tensorboard_args.append("--bind_all")
if minor == "5":
tensorboard_args.append("--load_fast=false")
if len(logdir.split(",")) > 1:
tensorboard_args.append(f"--logdir_spec={logdir}")
return tensorboard_args

tensorboard_args.append(f"--logdir={logdir}")
return tensorboard_args


if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
9 changes: 2 additions & 7 deletions master/static/srv/tensorboard-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@ if ! /bin/which "$DET_PYTHON_EXECUTABLE" >/dev/null 2>&1 ; then
fi

TENSORBOARD_VERSION=$(pip show tensorboard | grep Version | sed "s/[^:]*: *//")
TENSORBOARD_VERSION_MAJOR=$( echo "$TENSORBOARD_VERSION" | sed " s/[.].*//")

"$DET_PYTHON_EXECUTABLE" -m pip install -q --user /opt/determined/wheels/determined*.whl

cd ${WORKING_DIR} && test -f "${STARTUP_HOOK}" && source "${STARTUP_HOOK}"

if [ "$TENSORBOARD_VERSION_MAJOR" == 2 ]; then
"$DET_PYTHON_EXECUTABLE" -m pip install tensorboard-plugin-profile
exec "$DET_PYTHON_EXECUTABLE" -m determined.exec.tensorboard --bind_all --logdir_spec "$@"
else
exec "$DET_PYTHON_EXECUTABLE" -m determined.exec.tensorboard --logdir "$@"
fi
"$DET_PYTHON_EXECUTABLE" -m pip install tensorboard-plugin-profile
exec "$DET_PYTHON_EXECUTABLE" -m determined.exec.tensorboard "$TENSORBOARD_VERSION" "$@"

0 comments on commit a4fdf4b

Please sign in to comment.