Skip to content

Commit

Permalink
More detailed swarm manager state logging
Browse files Browse the repository at this point in the history
  • Loading branch information
natefoo committed Apr 6, 2018
1 parent 806703a commit 9cd5895
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 5 deletions.
34 changes: 34 additions & 0 deletions lib/galaxy/containers/docker_model.py
Expand Up @@ -10,6 +10,7 @@
ContainerPort,
ContainerVolume
)
from galaxy.util import pretty_print_time_interval


CPUS_LABEL = '_galaxy_cpus'
Expand Down Expand Up @@ -174,6 +175,7 @@ def __init__(self, interface, id, name=None, image=None, inspect=None):
self._name = name
self._image = image
self._inspect = inspect
self._env = {}
self._tasks = []
if inspect:
self._name = name or inspect['Spec']['Name']
Expand Down Expand Up @@ -267,6 +269,16 @@ def state(self):
break
return state

@property
def env(self):
if not self._env:
for env_str in self.inspect['Spec']['TaskTemplate']['ContainerSpec']['Env']:
try:
self._env.update([env_str.split('=', 1)])
except ValueError:
self._env[env_str] = None
return self._env

@property
def terminal(self):
"""Same caveats as :meth:`state`.
Expand Down Expand Up @@ -659,6 +671,22 @@ def inspect(self):
self._inspect = self._interface.task_inspect(self._id)
return self._inspect

@property
def slot(self):
return self.inspect['Slot']

@property
def node(self):
if not self._node:
self._node = self._interface.node(id=self.inspect['NodeID'])
return self._node

@property
def service(self):
if not self._service:
self._service = self._interface.service(id=self.inspect['ServiceID'])
return self._service

@property
def cpus(self):
try:
Expand All @@ -677,6 +705,12 @@ def state(self):
def current_state(self):
return self._state.lower()

@property
def current_state_time(self):
# Docker API returns a stamp w/ higher second precision than Python takes
stamp = self.inspect['Status']['Timestamp']
return pretty_print_time_interval(time=stamp[:stamp.index('.')+7], precise=True, utc=stamp[-1] == 'Z')

@property
def desired_state(self):
return self._desired_state.lower()
Expand Down
17 changes: 14 additions & 3 deletions lib/galaxy/containers/docker_swarm_manager.py
Expand Up @@ -63,6 +63,7 @@
'command_retries': 0,
'command_retry_wait': 10,
'terminate_when_idle': True,
'log_environment_variables': [],
}
log = logging.getLogger(__name__)

Expand Down Expand Up @@ -172,13 +173,23 @@ def _log_state(self, now=False):
services = list(self._docker_interface.services())
nodes = list(self._docker_interface.nodes())
terminal = [s for s in services if s.terminal]
log.info('%s nodes, %s services (%s are terminal)', len(nodes), len(services), len(terminal))
envs = {}
for service in services:
envs[service.id] = ['%s=%s' % (k, service.env.get(k, 'unset')) for k in self._conf.log_environment_variables]
log.info('%s nodes, %s services (%s terminal)', len(nodes), len(services), len(terminal))
if terminal:
service_strs = ['%s (state: %s)' % (s.name, s.state) for s in terminal]
log.info('terminal services: %s', ', '.join(service_strs) or 'none')
for node in nodes:
task_strs = ['%s (state: %s)' % (t.name, t.state) for t in node.non_terminal_tasks]
log.info('node %s (%s) state: %s, non-terminal tasks: %s', node.name, node.id, node.state, ', '.join(task_strs) or 'none')
log.info('node %s (%s) state: %s, %s tasks (%s terminal)', node.name, node.id, node.state,
len(node.tasks), len([t for t in node.tasks if t.terminal]))
for task in node.tasks:
env_str = ''
if envs.get(task.service.id):
env_str = ' [' + ', '.join(envs.get(task.service.id, [])) + ']'
log.info('node %s (%s) service %s (%s) task %s (%s)%s state: %s %s', node.name, node.id,
task.service.name, task.service.id, task.slot, task.id, env_str, task.state,
task.current_state_time)
self._last_log = time.time()

def _terminate_if_idle(self):
Expand Down
7 changes: 5 additions & 2 deletions lib/galaxy/util/__init__.py
Expand Up @@ -383,14 +383,17 @@ def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning
return value


def pretty_print_time_interval(time=False, precise=False):
def pretty_print_time_interval(time=False, precise=False, utc=False):
"""
Get a datetime object or a int() Epoch timestamp and return a
pretty string like 'an hour ago', 'Yesterday', '3 months ago',
'just now', etc
credit: http://stackoverflow.com/questions/1551382/user-friendly-time-format-in-python
"""
now = datetime.now()
if utc:
now = datetime.utcnow()
else:
now = datetime.now()
if type(time) is int:
diff = now - datetime.fromtimestamp(time)
elif isinstance(time, datetime):
Expand Down

0 comments on commit 9cd5895

Please sign in to comment.