In [1]:
import time
import attr
import pandas

from trains import Task, Logger

In [2]:
@attr.s(auto_attribs=True)
class TasksMonitor(object):
    tasks_to_monitor: list
    wait_time: int = 30
    logger: Logger = None
    hyperparameters: dict = None

    def __attrs_post_init__(self):
        self._axis: list = list(self.hyperparameters.keys())
        self._stop: bool = False
        self._finished_tasks: list = []
        self._hyperparameter_values: list = []
        self.logger = None if len(self._axis) != 2 else self.logger

    def wait_for_tasks_to_finish(self):
        while not self._stop:
            print('Current tasks status:')
            for new_task_id in self.tasks_to_monitor:
                if new_task_id in self._finished_tasks:
                    continue
                curr_task = Task.get_task(task_id=new_task_id)
                if curr_task.status in ['completed', 'stopped']:
                    self._finished_tasks.append(new_task_id)
                    self.prepare_report_from_results(curr_task)
                    if self.logger is not None:
                        self.logger.report_scatter3d(title='Results', series='test accuracy',
                                                     scatter=self._hyperparameter_values,
                                                     iteration=len(self._hyperparameter_values),
                                                     xaxis=self._axis[0], yaxis=self._axis[1], zaxis='test_accuracy')
                else:
                    print('Task "{}" status is "{}"'.format(curr_task.name, curr_task.status))
            print('{} tasks are in status "completed"'.format(len(self._finished_tasks)))

            self.stop() if len(self._finished_tasks) == len(self.tasks_to_monitor) else time.sleep(self.wait_time)

    def prepare_report_from_results(self, curr_task):
        result = curr_task.get_last_scalar_metrics()['accuracy']['test_accuracy']['last']
        curr_params = curr_task.get_parameters()
        curr_result_point = [curr_params[param_name] for param_name in self._axis]
        curr_result_point.append(result)
        self._hyperparameter_values.append(curr_result_point)

    def stop(self):
        self._stop = True

    def get_best_models(self, num_models=1):
        sorted_tasks = sorted([*zip(self._hyperparameter_values, self._finished_tasks)], key=lambda x: x[0][-1],
                              reverse=True)
        chosen_tasks = [{'task_id': task, 'result': params[-1], 'params_values': params[:-1],
                         'params_keys': self._axis}
                        for params, task in sorted_tasks[:num_models]]
        return chosen_tasks

In [3]:
task = Task.init(project_name='TensorFlow 2 example', task_name='Hyperparameter search example - phase 3')

TRAINS Task: created new task id=df6ffe76db0249f6b337638acf02c41a
TRAINS results page: https://demoapp.trainsai.io/projects/f30b6619cdb2470ebde62a76c5b0089b/experiments/df6ffe76db0249f6b337638acf02c41a/output/log


In [4]:
orig_task = Task.get_task(project_name='TensorFlow 2 example', task_name='Hyperparameter search example - phase 2')
artifact = orig_task.artifacts.get('children tasks')
df_Obj = artifact.get()
tasks_ids = df_Obj.get('Task ID').values

TRAINS Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


In [5]:
# Waiting for tasks to finish and reporting results to main task
monitor = TasksMonitor(tasks_to_monitor=tasks_ids, logger=task.get_logger(),
                       hyperparameters=artifact.metadata)
monitor.wait_for_tasks_to_finish()
best_model = monitor.get_best_models(num_models=1)
print('\nBest performing task:')
for key, value in best_model[0].items():
    print(key, value)

Current tasks status:
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.07953 batch_size=23" status is "in_progress"
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.01370 batch_size=13" status is "queued"
7 tasks are in status "completed"
Current tasks status:
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.07953 batch_size=23" status is "in_progress"
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.01370 batch_size=13" status is "queued"
7 tasks are in status "completed"
Current tasks status:
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.07953 batch_size=23" status is "in_progress"
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with learning_rate=0.01370 batch_size=13" status is "queued"
7 tasks are in status "completed"
Current tasks status:
Task "TensorFlow 2 quickstart for experts - danmalowanysMBP with lear