Skip to content

Commit

Permalink
feat: added cli logging to native [DET-3316] (#788)
Browse files Browse the repository at this point in the history
* feat: Native API Logging Improvement [DET-3316]

* feat: added logging for native

* feat: added cli logging to native [DET-3316]

* fix: order of imports for linting

* fix: migrated logs function from cli to common [DET-3316]

* fix: trial log path [DET-3316]

* fix: linting for cli trial.py [DET-3316]

* buggy test [DET-3316]

* buggy test part 2 [DET-3316]

* buggy test number 3 [DET-3316]

* authentication fix native logging [DET-3316]

* fix linting native logging [DET-3316]

* moving authentication_required to common api [DET-3316]

* fix: added authentication_required to authentication in common [DET-3316]
  • Loading branch information
naren-determined committed Jul 3, 2020
1 parent 9918ae1 commit 0c99a0c
Show file tree
Hide file tree
Showing 16 changed files with 195 additions and 179 deletions.
2 changes: 1 addition & 1 deletion cli/determined_cli/agent.py
Expand Up @@ -7,10 +7,10 @@

from determined_cli import render
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_false

from .declarative_argparse import Arg, Cmd, Group
from .user import authentication_required


def local_id(address: str) -> str:
Expand Down
5 changes: 3 additions & 2 deletions cli/determined_cli/checkpoint.py
Expand Up @@ -3,9 +3,10 @@
from typing import Any, Dict, List, Optional

from determined_common import api, constants, experimental
from determined_common.api.authentication import authentication_required
from determined_common.experimental import Determined

from . import render, user
from . import render
from .declarative_argparse import Arg, Cmd


Expand Down Expand Up @@ -58,7 +59,7 @@ def render_checkpoint(checkpoint: experimental.Checkpoint, path: Optional[str] =
render.tabulate_or_csv(headers, [values], False)


@user.authentication_required
@authentication_required
def list(args: Namespace) -> None:
params = {}
if args.best is not None:
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/cli.py
Expand Up @@ -27,10 +27,10 @@
from determined_cli.tensorboard import args_description as tensorboard_args_description
from determined_cli.trial import args_description as trial_args_description
from determined_cli.user import args_description as user_args_description
from determined_cli.user import authentication_required
from determined_cli.version import args_description as version_args_description
from determined_cli.version import check_version
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_not_none
from determined_common.util import chunks, debug_mode, get_default_master_address

Expand Down
104 changes: 3 additions & 101 deletions cli/determined_cli/experiment.py
Expand Up @@ -18,9 +18,8 @@
import determined_common
from determined_cli import checkpoint, render
from determined_cli.declarative_argparse import Arg, Cmd, Group
from determined_cli.trial import logs
from determined_cli.user import authentication_required
from determined_common import api, constants, context
from determined_common.api.authentication import authentication_required
from determined_common.experimental import Determined

from .checkpoint import render_checkpoint
Expand Down Expand Up @@ -52,103 +51,6 @@ def cancel(args: Namespace) -> None:
print("Canceled experiment {}".format(args.experiment_id))


def follow_experiment_logs(master_url: str, exp_id: int) -> None:
# Get the ID of this experiment's first trial (i.e., the one with the lowest ID).
print("Waiting for first trial to begin...")
while True:
r = api.get(master_url, "experiments/{}".format(exp_id))
if len(r.json()["trials"]) > 0:
break
else:
time.sleep(0.1)

first_trial_id = sorted(t_id["id"] for t_id in r.json()["trials"])[0]
print("Following first trial with ID {}".format(first_trial_id))

# Call `logs --follow` on the new trial.
logs_args = Namespace(trial_id=first_trial_id, follow=True, master=master_url, tail=None)
logs(logs_args)


def follow_test_experiment_logs(master_url: str, exp_id: int) -> None:
def print_progress(active_stage: int, ended: bool) -> None:
# There are four sequential stages of verification. Track the
# current stage with an index into this list.
stages = [
"Scheduling task",
"Testing training",
"Testing validation",
"Testing checkpointing",
]

for idx, stage in enumerate(stages):
if active_stage > idx:
color = "green"
checkbox = "✔"
elif active_stage == idx:
color = "red" if ended else "yellow"
checkbox = "✗" if ended else " "
else:
color = "white"
checkbox = " "
print(colored(stage + (25 - len(stage)) * ".", color), end="")
print(colored(" [" + checkbox + "]", color), end="")

if idx == len(stages) - 1:
print("\n" if ended else "\r", end="")
else:
print(", ", end="")

while True:
r = api.get(master_url, "experiments/{}".format(exp_id)).json()

# Wait for experiment to start and initialize a trial and step.
if len(r["trials"]) < 1 or len(r["trials"][0]["steps"]) < 1:
step = {} # type: Dict
else:
step = r["trials"][0]["steps"][0]

# Update the active_stage by examining the result from master
# /experiments/<experiment-id> endpoint.
if r["state"] == constants.COMPLETED:
active_stage = 4
elif step.get("checkpoint"):
active_stage = 3
elif step.get("validation"):
active_stage = 2
elif step:
active_stage = 1
else:
active_stage = 0

# If the experiment is in a terminal state, output the appropriate
# message and exit. Otherwise, sleep and repeat.
if r["state"] == constants.COMPLETED:
print_progress(active_stage, ended=True)
print(colored("Model definition test succeeded! 🎉", "green"))
return
elif r["state"] == constants.CANCELED:
print_progress(active_stage, ended=True)
print(
colored(
"Model definition test (ID: {}) canceled before "
"model test could complete. Please re-run the "
"command.".format(exp_id),
"yellow",
)
)
sys.exit(1)
elif r["state"] == constants.ERROR:
print_progress(active_stage, ended=True)
trial_id = r["trials"][0]["id"]
logs_args = Namespace(trial_id=trial_id, master=master_url, tail=None, follow=False)
logs(logs_args)
sys.exit(1)
else:
print_progress(active_stage, ended=False)
time.sleep(0.2)


def read_git_metadata(model_def_path: pathlib.Path) -> Tuple[str, str, str, str]:
"""
Attempt to read the git metadata from the model definition directory. If
Expand Down Expand Up @@ -257,7 +159,7 @@ def submit_experiment(args: Namespace) -> None:
additional_body_fields=additional_body_fields,
)
print(colored("Test experiment ID: {}".format(exp_id), "green"))
follow_test_experiment_logs(args.master, exp_id)
api.experiment.follow_test_experiment_logs(args.master, exp_id)
else:
exp_id = api.experiment.create_experiment(
master_url=args.master,
Expand All @@ -270,7 +172,7 @@ def submit_experiment(args: Namespace) -> None:
)
print("Created experiment {}".format(exp_id))
if not args.paused and args.follow_first_trial:
follow_experiment_logs(args.master, exp_id)
api.experiment.follow_experiment_logs(args.master, exp_id)


def local_experiment(args: Namespace) -> None:
Expand Down
6 changes: 3 additions & 3 deletions cli/determined_cli/master.py
Expand Up @@ -6,19 +6,19 @@
from requests import Response

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_gt

from . import user
from .declarative_argparse import Arg, Cmd


@user.authentication_required
@authentication_required
def config(args: Namespace) -> None:
response = api.get(args.master, "config")
print(json.dumps(response.json(), indent=4))


@user.authentication_required
@authentication_required
def logs(args: Namespace) -> None:
def process_response(response: Response, latest_log_id: int) -> int:
for log in response.json():
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/notebook.py
Expand Up @@ -5,6 +5,7 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq

from . import render
Expand All @@ -20,7 +21,6 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


@authentication_required
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/remote.py
Expand Up @@ -6,6 +6,7 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .command import (
Expand All @@ -20,7 +21,6 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


@authentication_required
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/shell.py
Expand Up @@ -8,6 +8,7 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq, check_len

from . import render
Expand All @@ -23,7 +24,6 @@
render_event_stream,
)
from .declarative_argparse import Arg, Cmd
from .user import authentication_required


def get_agent_user(host: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/template.py
Expand Up @@ -7,10 +7,10 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .declarative_argparse import Arg, Cmd
from .user import authentication_required

TemplateClean = namedtuple("TemplateClean", ["name"])
TemplateAll = namedtuple("TemplateAll", ["name", "config"])
Expand Down
2 changes: 1 addition & 1 deletion cli/determined_cli/tensorboard.py
Expand Up @@ -6,12 +6,12 @@
from termcolor import colored

from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.check import check_eq

from . import render
from .command import Command, render_event_stream
from .declarative_argparse import Arg, Cmd
from .user import authentication_required

Tensorboard = namedtuple(
"Tensorboard",
Expand Down
57 changes: 4 additions & 53 deletions cli/determined_cli/trial.py
@@ -1,18 +1,15 @@
import distutils.util
import json
import time
from argparse import Namespace
from typing import Any, List, Optional

from termcolor import colored
from typing import Any, List

from determined_cli import render
from determined_common import api, constants
from determined_common import api
from determined_common.api.authentication import authentication_required
from determined_common.experimental import Determined

from .checkpoint import format_checkpoint, format_validation, render_checkpoint
from .declarative_argparse import Arg, Cmd, Group
from .user import authentication_required


@authentication_required
Expand Down Expand Up @@ -80,52 +77,6 @@ def describe_trial(args: Namespace) -> None:
render.tabulate_or_csv(headers, values, args.csv)


@authentication_required
def logs(args: Namespace) -> None:
last_offset, last_state = 0, None

def print_logs(offset: Optional[int], limit: Optional[int] = 5000) -> Any:
nonlocal last_offset, last_state
path = "trials/{}/logsv2?".format(args.trial_id)
if offset is not None:
path += "&offset={}".format(offset)
if limit is not None:
path += "&limit={}".format(limit)
logs = api.get(args.master, path).json()
for log in logs:
print(log["message"], end="")
last_state = log["state"]
return logs[-1]["id"] if logs else last_offset

try:
if args.tail is not None:
last_offset = print_logs(None, args.tail)
else:
while True:
new_offset = print_logs(last_offset)
if last_offset == new_offset:
break
last_offset = new_offset

if not args.follow:
return
while True:
last_offset = print_logs(last_offset)
if last_state in constants.TERMINAL_STATES:
break
time.sleep(0.2)
except KeyboardInterrupt:
pass
finally:
print(
colored(
"Trial is in the {} state. To reopen log stream, run: "
"det trial logs -f {}".format(last_state, args.trial_id),
"green",
)
)


def download(args: Namespace) -> None:
checkpoint = (
Determined(args.master, None)
Expand Down Expand Up @@ -228,7 +179,7 @@ def kill_trial(args: Namespace) -> None:
),
Cmd(
"logs",
logs,
api.experiment.logs,
"fetch trial logs",
[
Arg("trial_id", type=int, help="trial ID"),
Expand Down
11 changes: 1 addition & 10 deletions cli/determined_cli/user.py
Expand Up @@ -9,6 +9,7 @@

import determined_common.api.authentication as auth
from determined_common import api
from determined_common.api.authentication import authentication_required

from . import render
from .declarative_argparse import Arg, Cmd
Expand All @@ -33,16 +34,6 @@ def f(namespace: Namespace) -> Any:
return f


def authentication_required(func: Callable[[Namespace], Any]) -> Callable[..., Any]:
@wraps(func)
def f(namespace: Namespace) -> Any:
v = vars(namespace)
auth.initialize_session(namespace.master, v.get("user"), try_reauth=True)
return func(namespace)

return f


def update_user(
username: str,
master_address: str,
Expand Down
2 changes: 2 additions & 0 deletions common/determined_common/api/__init__.py
Expand Up @@ -6,6 +6,8 @@
create_test_experiment,
make_test_experiment_config,
patch_experiment,
follow_experiment_logs,
follow_test_experiment_logs,
)
from determined_common.api.request import (
WebSocket,
Expand Down

0 comments on commit 0c99a0c

Please sign in to comment.