diff --git a/deepobs/abstract_runner/abstract_runner.py b/deepobs/abstract_runner/abstract_runner.py index 867b3d12..b4aa08cf 100644 --- a/deepobs/abstract_runner/abstract_runner.py +++ b/deepobs/abstract_runner/abstract_runner.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """Module implementing the abstract Runner.""" import os import json @@ -11,6 +10,7 @@ import warnings from copy import deepcopy from deepobs import config as global_config +import glob class Runner(abc.ABC): @@ -31,7 +31,6 @@ class Runner(abc.ABC): create_output_directory: Creates the output folder of the run. write_output: Writes the output of the run to the output directory. """ - def __init__(self, optimizer_class, hyperparameter_names): """ Creates a new Runner instance @@ -56,22 +55,21 @@ def __init__(self, optimizer_class, hyperparameter_names): self._hyperparameter_names = hyperparameter_names def run(self, - testproblem = None, - hyperparams = None, - batch_size = None, - num_epochs = None, + testproblem=None, + hyperparams=None, + batch_size=None, + num_epochs=None, random_seed=None, data_dir=None, output_dir=None, weight_decay=None, no_logs=None, - train_log_interval = None, - print_train_iter = None, - tb_log = None, - tb_log_dir = None, - **training_params - ): - + train_log_interval=None, + print_train_iter=None, + tb_log=None, + tb_log_dir=None, + skip_if_exists=False, + **training_params): """Runs a testproblem with the optimizer_class. Has the following tasks: 1. setup testproblem 2. run the training (must be implemented by subclass) @@ -91,6 +89,7 @@ def run(self, print_train_iter (bool): Whether to print the training progress at each train_log_interval. tb_log (bool): Whether to use tensorboard logging or not tb_log_dir (str): The path where to save tensorboard events. + skip_if_exists (bool): Skip training if the output already exists. training_params (dict): Kwargs for the training method. Returns: @@ -105,85 +104,95 @@ def run(self, where <...meta data...> stores the run args. """ - args = self.parse_args(testproblem, - hyperparams, - batch_size, - num_epochs, - random_seed, - data_dir, - output_dir, - weight_decay, - no_logs, - train_log_interval, - print_train_iter, - tb_log, - tb_log_dir, - training_params) - - return self._run(**args) + exists, matches = self.run_exists( + testproblem=testproblem, + hyperparams=hyperparams, + batch_size=batch_size, + num_epochs=num_epochs, + random_seed=random_seed, + data_dir=data_dir, + output_dir=output_dir, + weight_decay=weight_decay, + no_logs=no_logs, + train_log_interval=train_log_interval, + print_train_iter=print_train_iter, + tb_log=tb_log, + tb_log_dir=tb_log_dir, + **training_params) + + require_run = not (exists and skip_if_exists) + + if require_run: + args = self.parse_args( + testproblem, + hyperparams, + batch_size, + num_epochs, + random_seed, + data_dir, + output_dir, + weight_decay, + no_logs, + train_log_interval, + print_train_iter, + tb_log, + tb_log_dir, + training_params, + ) + + return self._run(**args) + else: + print("Found output file(s): {}\nSkipping run.".format(matches)) def _run(self, - testproblem = None, - hyperparams = None, - batch_size = None, - num_epochs = None, - random_seed = None, - data_dir = None, - output_dir = None, - weight_decay = None, - no_logs = None, - train_log_interval = None, - print_train_iter = None, - tb_log = None, - tb_log_dir = None, + testproblem=None, + hyperparams=None, + batch_size=None, + num_epochs=None, + random_seed=None, + data_dir=None, + output_dir=None, + weight_decay=None, + no_logs=None, + train_log_interval=None, + print_train_iter=None, + tb_log=None, + tb_log_dir=None, **training_params): # Creates a backup copy of the initial parameters. Users might change the dicts during training. hyperparams_before_training = deepcopy(hyperparams) training_params_before_training = deepcopy(training_params) - if batch_size is None: - batch_size = global_config.get_testproblem_default_setting(testproblem)['batch_size'] - if num_epochs is None: - num_epochs = global_config.get_testproblem_default_setting(testproblem)['num_epochs'] + batch_size = self._use_default_batch_size_if_missing( + testproblem, batch_size) + num_epochs = self._use_default_num_epochs_if_missing( + testproblem, num_epochs) if data_dir is not None: global_config.set_data_dir(data_dir) - run_directory, file_name = self.generate_output_directory_name(testproblem, - batch_size, - num_epochs, - weight_decay, - random_seed, - output_dir, - hyperparams, - **training_params) + run_directory, file_name = self.generate_output_directory_name( + testproblem, batch_size, num_epochs, weight_decay, random_seed, + output_dir, hyperparams, **training_params) if tb_log: if tb_log_dir == 'none': - print('Tensorboard logging: No tb_log_dir specified, using settings folder {0:s} as default.'.format(run_directory)) + print( + 'Tensorboard logging: No tb_log_dir specified, using settings folder {0:s} as default.' + .format(run_directory)) os.makedirs(run_directory, exist_ok=True) tb_log_dir = run_directory - tproblem = self.create_testproblem(testproblem, - batch_size, - weight_decay, - random_seed) - - output = self.training(tproblem, - hyperparams, - num_epochs, - print_train_iter, - train_log_interval, - tb_log, - tb_log_dir, - **training_params) - - output = self._post_process_output(output, - testproblem, - batch_size, - num_epochs, - random_seed, + tproblem = self.create_testproblem(testproblem, batch_size, + weight_decay, random_seed) + + output = self.training(tproblem, hyperparams, num_epochs, + print_train_iter, train_log_interval, tb_log, + tb_log_dir, **training_params) + + output = self._post_process_output(output, testproblem, batch_size, + num_epochs, random_seed, weight_decay, hyperparams_before_training, **training_params_before_training) @@ -193,16 +202,99 @@ def _run(self, return output + def run_exists(self, + testproblem=None, + hyperparams=None, + batch_size=None, + num_epochs=None, + random_seed=None, + data_dir=None, + output_dir=None, + weight_decay=None, + no_logs=None, + train_log_interval=None, + print_train_iter=None, + tb_log=None, + tb_log_dir=None, + **training_params): + """Return whether output file for this run already exists. + + Args: + See `run` method. + + Returns: + bool, list(str): The first parameter is `True` if the `.json` \ + output file already exists, else `False`. The list contains \ + the paths to the files that match the run. + """ + args = self.parse_args( + testproblem, + hyperparams, + batch_size, + num_epochs, + random_seed, + data_dir, + output_dir, + weight_decay, + no_logs, + train_log_interval, + print_train_iter, + tb_log, + tb_log_dir, + training_params, + ) + return self._run_exists(**args) + + def _run_exists(self, + testproblem=None, + hyperparams=None, + batch_size=None, + num_epochs=None, + random_seed=None, + data_dir=None, + output_dir=None, + weight_decay=None, + no_logs=None, + train_log_interval=None, + print_train_iter=None, + tb_log=None, + tb_log_dir=None, + **training_params): + + batch_size = self._use_default_batch_size_if_missing( + testproblem, batch_size) + num_epochs = self._use_default_num_epochs_if_missing( + testproblem, num_epochs) + + run_directory, _ = self.generate_output_directory_name( + testproblem, batch_size, num_epochs, weight_decay, random_seed, + output_dir, hyperparams, **training_params) + file_regex = "{}*.json".format(self._filename_no_date(random_seed)) + pattern = os.path.join(run_directory, file_regex) + matches = glob.glob(pattern) + + exists = bool(matches) + return exists, matches + + def _use_default_batch_size_if_missing(self, testproblem, batch_size): + fall_back_to_default = (batch_size is None) + if fall_back_to_default: + batch_size = self._use_default(testproblem, 'batch_size') + return batch_size + + def _use_default_num_epochs_if_missing(self, testproblem, num_epochs): + fall_back_to_default = (num_epochs is None) + if fall_back_to_default: + num_epochs = self._use_default(testproblem, 'num_epochs') + return num_epochs + + @staticmethod + def _use_default(testproblem, key): + return global_config.get_testproblem_default_setting(testproblem)[key] @abc.abstractmethod - def training(self, - tproblem, - hyperparams, - num_epochs, - print_train_iter, - train_log_interval, - tb_log, tb_log_dir, - **training_params): + def training(self, tproblem, hyperparams, num_epochs, print_train_iter, + train_log_interval, tb_log, tb_log_dir, **training_params): """Performs the training and stores the metrices. Args: @@ -259,18 +351,23 @@ def _add_hyperparams_to_argparse(self, parser, args, hyperparams): hyperparams (dict): Hyperparameters that are to read in. """ - if hyperparams is None: # if no hyperparams dict is passed to run() - for hp_name, hp_specification in self._hyperparameter_names.items(): - _add_hp_to_argparse(parser, self._optimizer_name, hp_specification, hp_name) + if hyperparams is None: # if no hyperparams dict is passed to run() + for hp_name, hp_specification in self._hyperparameter_names.items( + ): + _add_hp_to_argparse(parser, self._optimizer_name, + hp_specification, hp_name) - else: # if there is one, fill the missing params from command line - for hp_name, hp_specification in self._hyperparameter_names.items(): + else: # if there is one, fill the missing params from command line + for hp_name, hp_specification in self._hyperparameter_names.items( + ): if hp_name in hyperparams: args[hp_name] = hyperparams[hp_name] else: - _add_hp_to_argparse(parser, self._optimizer_name, hp_specification, hp_name) + _add_hp_to_argparse(parser, self._optimizer_name, + hp_specification, hp_name) - def _add_training_params_to_output_dir_name(self, training_params, run_folder_name): + def _add_training_params_to_output_dir_name(self, training_params, + run_folder_name): """Overwrite this method to specify how your runner should format additional training_parameters in the run folder name. @@ -285,10 +382,12 @@ def _add_training_params_to_output_dir_name(self, training_params, run_folder_na if tp_value is not None: run_folder_name += "__{0:s}".format(tp_name) run_folder_name += "__{0:s}".format( - float2str(tp_value) if isinstance(tp_value, float) else str(tp_value)) + float2str(tp_value) if isinstance(tp_value, float + ) else str(tp_value)) return run_folder_name - def _add_hyperparams_to_output_dir_name(self, optimizer_hyperparams, run_folder_name): + def _add_hyperparams_to_output_dir_name(self, optimizer_hyperparams, + run_folder_name): """Overwrite this method to specify how your runner should format optimizer hyper_parameters in the run folder name. @@ -304,25 +403,14 @@ def _add_hyperparams_to_output_dir_name(self, optimizer_hyperparams, run_folder_ for hp_name, hp_value in sorted(optimizer_hyperparams.items()): run_folder_name += "__{0:s}".format(hp_name) run_folder_name += "__{0:s}".format( - float2str(hp_value) if isinstance(hp_value, float) else str(hp_value)) + float2str(hp_value) if isinstance(hp_value, float + ) else str(hp_value)) return run_folder_name - def parse_args(self, - testproblem, - hyperparams, - batch_size, - num_epochs, - random_seed, - data_dir, - output_dir, - weight_decay, - no_logs, - train_log_interval, - print_train_iter, - tb_log, - tb_log_dir, + def parse_args(self, testproblem, hyperparams, batch_size, num_epochs, + random_seed, data_dir, output_dir, weight_decay, no_logs, + train_log_interval, print_train_iter, tb_log, tb_log_dir, training_params): - """Constructs an argparse.ArgumentParser and parses the arguments from command line. Args: @@ -345,7 +433,8 @@ def parse_args(self, dict: A dicionary of all arguments. """ args = {} - parser = argparse.ArgumentParser(description='Arguments for running optimizer script.') + parser = argparse.ArgumentParser( + description='Arguments for running optimizer script.') if testproblem is None: parser.add_argument('testproblem') @@ -353,11 +442,10 @@ def parse_args(self, args['testproblem'] = testproblem if weight_decay is None: - parser.add_argument( - "--weight_decay", - "--wd", - type=float, - help="""Factor + parser.add_argument("--weight_decay", + "--wd", + type=float, + help="""Factor used for the weight_deacy. If not given, the default weight decay for this model is used. Note that not all models use weight decay and this value will be ignored in such a case.""") @@ -365,20 +453,18 @@ def parse_args(self, args['weight_decay'] = weight_decay if batch_size is None: - parser.add_argument( - "--batch_size", - "--bs", - type=int, - help="The batch size (positive integer).") + parser.add_argument("--batch_size", + "--bs", + type=int, + help="The batch size (positive integer).") else: args['batch_size'] = batch_size if num_epochs is None: - parser.add_argument( - "-N", - "--num_epochs", - type=int, - help="Total number of training epochs.") + parser.add_argument("-N", + "--num_epochs", + type=int, + help="Total number of training epochs.") else: args['num_epochs'] = num_epochs @@ -393,9 +479,8 @@ def parse_args(self, args['random_seed'] = random_seed if data_dir is None: - parser.add_argument( - "--data_dir", - help="""Path to the base data dir. If + parser.add_argument("--data_dir", + help="""Path to the base data dir. If not specified, DeepOBS uses its default.""") else: args['data_dir'] = data_dir @@ -424,7 +509,7 @@ def parse_args(self, if train_log_interval is None: parser.add_argument( "--train_log_interval", - type = int, + type=int, default=10, help="""Interval of steps at which to log training loss.""") else: @@ -436,7 +521,9 @@ def parse_args(self, action="store_const", const=True, default=False, - help="""Add this flag to print the mini-batch-loss at the train_log_interval.""") + help= + """Add this flag to print the mini-batch-loss at the train_log_interval.""" + ) else: args['print_train_iter'] = print_train_iter @@ -455,7 +542,9 @@ def parse_args(self, "--tb_log_dir", type=str, default="none", - help="""Path to the directory where the tensorboard logs are saved.""") + help= + """Path to the directory where the tensorboard logs are saved.""" + ) else: args['tb_log_dir'] = tb_log_dir @@ -474,16 +563,10 @@ def parse_args(self, return args - def generate_output_directory_name(self, - testproblem, - batch_size, - num_epochs, - weight_decay, - random_seed, - output_dir, - optimizer_hyperparams, - **training_params - ): + def generate_output_directory_name(self, testproblem, batch_size, + num_epochs, weight_decay, random_seed, + output_dir, optimizer_hyperparams, + **training_params): # add everything mandatory to the name run_folder_name = "num_epochs__" + str( num_epochs) + "__batch_size__" + str(batch_size) @@ -492,22 +575,30 @@ def generate_output_directory_name(self, float2str(weight_decay)) # Add all hyperparameters to the name. - run_folder_name = self._add_hyperparams_to_output_dir_name(optimizer_hyperparams, run_folder_name) + run_folder_name = self._add_hyperparams_to_output_dir_name( + optimizer_hyperparams, run_folder_name) # Add training parameters to the name. - run_folder_name = self._add_training_params_to_output_dir_name(training_params, run_folder_name) + run_folder_name = self._add_training_params_to_output_dir_name( + training_params, run_folder_name) - file_name = "random_seed__{0:d}__".format(random_seed) + file_name = self._filename_no_date(random_seed) file_name += time.strftime("%Y-%m-%d-%H-%M-%S") - run_directory = os.path.join(output_dir, testproblem, self._optimizer_name, - run_folder_name) + run_directory = os.path.join(output_dir, testproblem, + self._optimizer_name, run_folder_name) return run_directory, file_name - - def _post_process_output(self, output, testproblem, batch_size, num_epochs, random_seed, weight_decay, hyperparams, **training_params): + + @staticmethod + def _filename_no_date(random_seed): + return "random_seed__{0:d}__".format(random_seed) + + def _post_process_output(self, output, testproblem, batch_size, num_epochs, + random_seed, weight_decay, hyperparams, + **training_params): """Ensures that for both frameworks the structure of the output is the same""" - + # remove test accuracy if it is not available if 'test_accuracies' in output: if all(output['test_accuracies']) == 0: @@ -517,20 +608,22 @@ def _post_process_output(self, output, testproblem, batch_size, num_epochs, rand del output['valid_accuracies'] except KeyError: pass - + # merge meta data to output dict - output = {'testproblem': testproblem, - 'batch_size': batch_size, - 'num_epochs': num_epochs, - 'random_seed': random_seed, - 'weight_decay': weight_decay, - 'optimizer_name': self._optimizer_name, - 'optimizer_hyperparams': hyperparams, - 'training_params': training_params, - **output} - + output = { + 'testproblem': testproblem, + 'batch_size': batch_size, + 'num_epochs': num_epochs, + 'random_seed': random_seed, + 'weight_decay': weight_decay, + 'optimizer_name': self._optimizer_name, + 'optimizer_hyperparams': hyperparams, + 'training_params': training_params, + **output + } + return output - + @staticmethod def write_output(output, run_folder_name, file_name): """Writes the JSON output. @@ -540,17 +633,20 @@ def write_output(output, run_folder_name, file_name): run_folder_name (str): The name of the output folder. file_name (str): The file name where the output is written to. """ - with open(os.path.join(run_folder_name, file_name + ".json"), "w") as f: + with open(os.path.join(run_folder_name, file_name + ".json"), + "w") as f: json.dump(output, f, indent=4) @staticmethod - def _abort_routine(epoch_count, num_epochs, train_losses, valid_losses, test_losses, train_accuracies, - valid_accuracies, test_accuracies, minibatch_train_losses): + def _abort_routine(epoch_count, num_epochs, train_losses, valid_losses, + test_losses, train_accuracies, valid_accuracies, + test_accuracies, minibatch_train_losses): """A routine that is executed if a training run is aborted (loss is NaN or Inf).""" - warnings.warn('Breaking from run after epoch ' + str( - epoch_count) + 'due to wrongly calibrated optimization (Loss is Nan or Inf). The metrices for the remaining epochs will be filled with the initial performance values.', - RuntimeWarning) + warnings.warn( + 'Breaking from run after epoch ' + str(epoch_count) + + 'due to wrongly calibrated optimization (Loss is Nan or Inf). The metrices for the remaining epochs will be filled with the initial performance values.', + RuntimeWarning) # fill the rest of the metrices with initial observations for i in range(epoch_count, num_epochs): diff --git a/deepobs/analyzer/analyze.py b/deepobs/analyzer/analyze.py index 13ab8fcc..3b984919 100644 --- a/deepobs/analyzer/analyze.py +++ b/deepobs/analyzer/analyze.py @@ -1,14 +1,21 @@ - from __future__ import print_function + import os +import time +from collections import Counter + import numpy as np +import pandas as pd + from matplotlib import pyplot as plt -from .shared_utils import create_setting_analyzer_ranking, _determine_available_metric, _get_optimizer_name_and_testproblem_from_path, _check_output_structure, _check_setting_folder_is_not_empty + from ..tuner.tuner_utils import generate_tuning_summary -from .analyze_utils import _rescale_ax, _preprocess_path -import pandas as pd -import time -from collections import Counter +from .analyze_utils import _preprocess_path, _rescale_ax +from .shared_utils import (_check_output_structure, + _check_setting_folder_is_not_empty, + _determine_available_metric, + _get_optimizer_name_and_testproblem_from_path, + create_setting_analyzer_ranking) def check_output(results_path): @@ -26,32 +33,38 @@ def check_output(results_path): optimizers = os.listdir(testproblem_path) for optimizer in optimizers: optimizer_path = os.path.join(testproblem_path, optimizer) - settings = [setting for setting in os.listdir(optimizer_path) if os.path.isdir(os.path.join(optimizer_path, setting)) and 'num_epochs' in setting] + settings = [ + setting for setting in os.listdir(optimizer_path) + if os.path.isdir(os.path.join(optimizer_path, setting)) + and 'num_epochs' in setting + ] n_runs_list = [] for setting in settings: setting_path = os.path.join(optimizer_path, setting) _check_setting_folder_is_not_empty(setting_path) - jsons_files = [file for file in os.listdir(setting_path) if 'json' in file] + jsons_files = [ + file for file in os.listdir(setting_path) if 'json' in file + ] n_runs_list.append(len(jsons_files)) for json_file in jsons_files: json_path = os.path.join(setting_path, json_file) _check_output_structure(setting_path, json_file) counter = Counter(n_runs_list) for n_runs, count in counter.items(): - print('{0:s} | {1:s}: {2:d} setting(s) with {3:d} seed(s).'.format(testproblem, optimizer, count, n_runs)) + print('{0:s} | {1:s}: {2:d} setting(s) with {3:d} seed(s).'. + format(testproblem, optimizer, count, n_runs)) def estimate_runtime(framework, runner_cls, optimizer_cls, optimizer_hp, - n_runs = 5, + n_runs=5, sgd_lr=0.01, testproblem='mnist_mlp', - num_epochs = 5, - batch_size = 128, + num_epochs=5, + batch_size=128, **kwargs): - """Can be used to estimates the runtime overhead of a new optimizer compared to SGD. Runs the new optimizer and SGD seperately and calculates the fraction of wall clock overhead. @@ -98,13 +111,12 @@ def estimate_runtime(framework, # SGD print("Running SGD") start_sgd = time.time() - runner.run( - testproblem=testproblem, - hyperparams=hyperparams, - batch_size=batch_size, - num_epochs=num_epochs, - no_logs=True, - **kwargs) + runner.run(testproblem=testproblem, + hyperparams=hyperparams, + batch_size=batch_size, + num_epochs=num_epochs, + no_logs=True, + **kwargs) end_sgd = time.time() sgd_times.append(end_sgd - start_sgd) @@ -114,13 +126,12 @@ def estimate_runtime(framework, runner = runner_cls(optimizer_cls, optimizer_hp) print("Running...", optimizer_class.__name__) start_script = time.time() - runner.run( - testproblem=testproblem, - hyperparams=hyperparams, - batch_size=batch_size, - num_epochs=num_epochs, - no_logs=True, - **kwargs) + runner.run(testproblem=testproblem, + hyperparams=hyperparams, + batch_size=batch_size, + num_epochs=num_epochs, + no_logs=True, + **kwargs) end_script = time.time() new_opt_times.append(end_script - start_script) @@ -139,8 +150,10 @@ def estimate_runtime(framework, return output -def plot_results_table(results_path, mode='most', metric='valid_accuracies', conv_perf_file=None): - +def plot_results_table(results_path, + mode='most', + metric='valid_accuracies', + conv_perf_file=None): """Summarizes the performance of the optimizer and prints it to a pandas data frame. Args: @@ -154,7 +167,9 @@ def plot_results_table(results_path, mode='most', metric='valid_accuracies', con """ table_dic = {} testproblems = os.listdir(results_path) - metric_keys = ['Hyperparameters', 'Performance', 'Speed', 'Training Parameters'] + metric_keys = [ + 'Hyperparameters', 'Performance', 'Speed', 'Training Parameters' + ] for testproblem in testproblems: # init new subdict for testproblem for metric_key in metric_keys: @@ -164,11 +179,14 @@ def plot_results_table(results_path, mode='most', metric='valid_accuracies', con optimizers = os.listdir(testproblem_path) for optimizer in optimizers: optimizer_path = os.path.join(testproblem_path, optimizer) - optimizer_performance_dic = get_performance_dictionary(optimizer_path, mode, metric, conv_perf_file) + optimizer_performance_dic = get_performance_dictionary( + optimizer_path, mode, metric, conv_perf_file) # invert inner dics for multiindexing for metric_key in metric_keys: - table_dic[(testproblem, metric_key)][optimizer] = optimizer_performance_dic[metric_key] + table_dic[( + testproblem, metric_key + )][optimizer] = optimizer_performance_dic[metric_key] # correct multiindexing table = pd.DataFrame.from_dict(table_dic, orient='index') @@ -176,7 +194,11 @@ def plot_results_table(results_path, mode='most', metric='valid_accuracies', con return table -def plot_testset_performances(results_path, mode = 'most', metric = 'valid_accuracies', reference_path = None): +def plot_testset_performances(results_path, + mode='most', + metric='valid_accuracies', + reference_path=None, + which='mean_and_std'): """Plots all optimizer performances for all testproblems. Args: @@ -184,27 +206,46 @@ def plot_testset_performances(results_path, mode = 'most', metric = 'valid_accur mode (str): The mode by which to decide the best setting. metric (str): The metric by which to decide the best setting. reference_path(str): Path to the reference results folder. For each available reference testproblem, all optimizers are plotted as reference. + which (str): ['mean_and_std', 'median_and_quartiles'] Solid plot mean or median, shaded plots standard deviation or lower/upper quartiles. Returns: matplotlib.axes.Axes: The axes with the plots. """ - testproblems = [path for path in os.listdir(results_path) if os.path.isdir(os.path.join(results_path, path))] + testproblems = [ + path for path in os.listdir(results_path) + if os.path.isdir(os.path.join(results_path, path)) + ] if reference_path is not None: reference_path = os.path.join(reference_path) - reference_testproblems = [path for path in os.listdir(results_path) if os.path.isdir(os.path.join(reference_path, path))] + reference_testproblems = [ + path for path in os.listdir(results_path) + if os.path.isdir(os.path.join(reference_path, path)) + ] else: reference_testproblems = [] n_testproblems = len(testproblems) __, ax = plt.subplots(4, n_testproblems, sharex='col') for idx, testproblem in enumerate(testproblems): testproblem_path = os.path.join(results_path, testproblem) - ax[:, idx] = _plot_optimizer_performance(testproblem_path, ax[:, idx], mode, metric) + ax[:, idx] = _plot_optimizer_performance(testproblem_path, + ax[:, idx], + mode, + metric, + which=which) if testproblem in reference_testproblems: - reference_testproblem_path = os.path.join(reference_path, testproblem) - ax[:, idx] = _plot_optimizer_performance(reference_testproblem_path, ax[:, idx], mode, metric) - - metrices = ['test_losses', 'train_losses', 'test_accuracies', 'train_accuracies'] + reference_testproblem_path = os.path.join(reference_path, + testproblem) + ax[:, idx] = _plot_optimizer_performance( + reference_testproblem_path, + ax[:, idx], + mode, + metric, + which=which) + + metrices = [ + 'test_losses', 'train_losses', 'test_accuracies', 'train_accuracies' + ] for idx, _metric in enumerate(metrices): # label y axes ax[idx, 0].set_ylabel(_metric) @@ -215,16 +256,21 @@ def plot_testset_performances(results_path, mode = 'most', metric = 'valid_accur # show legend of optimizers ax[0, 0].legend() plt.tight_layout() - plt.show() return ax -def plot_hyperparameter_sensitivity_2d(optimizer_path, hyperparams, mode='final', metric = 'valid_accuracies', xscale='linear', yscale = 'linear'): +def plot_hyperparameter_sensitivity_2d(optimizer_path, + hyperparams, + mode='final', + metric='valid_accuracies', + xscale='linear', + yscale='linear'): param1, param2 = hyperparams metric = _determine_available_metric(optimizer_path, metric) tuning_summary = generate_tuning_summary(optimizer_path, mode, metric) - optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path(optimizer_path) + optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) param_values1 = np.array([d['params'][param1] for d in tuning_summary]) param_values2 = np.array([d['params'][param2] for d in tuning_summary]) @@ -234,7 +280,11 @@ def plot_hyperparameter_sensitivity_2d(optimizer_path, hyperparams, mode='final' _, ax = plt.subplots() - con = ax.tricontourf(param_values1, param_values2, target_means, cmap = 'CMRmap', levels=len(target_means)) + con = ax.tricontourf(param_values1, + param_values2, + target_means, + cmap='CMRmap', + levels=len(target_means)) ax.scatter(param_values1, param_values2) ax.set_xscale(xscale) ax.set_yscale(yscale) @@ -242,25 +292,30 @@ def plot_hyperparameter_sensitivity_2d(optimizer_path, hyperparams, mode='final' ax.set_ylabel(param2) cbar = plt.colorbar(con) cbar.set_label(metric) - plt.show() return ax -def _plot_hyperparameter_sensitivity(optimizer_path, hyperparam, ax, mode='final', metric = 'valid_accuracies', - plot_std=False): +def _plot_hyperparameter_sensitivity(optimizer_path, + hyperparam, + ax, + mode='final', + metric='valid_accuracies', + plot_std=False): metric = _determine_available_metric(optimizer_path, metric) tuning_summary = generate_tuning_summary(optimizer_path, mode, metric) - optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path(optimizer_path) + optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) # create array for plotting param_values = [d['params'][hyperparam] for d in tuning_summary] - target_means = [d[metric +'_mean'] for d in tuning_summary] - target_stds = [d[metric +'_mean'] for d in tuning_summary] + target_means = [d[metric + '_mean'] for d in tuning_summary] + target_stds = [d[metric + '_mean'] for d in tuning_summary] - param_values, target_means, target_stds = (list(t) for t in - zip(*sorted(zip(param_values, target_means, target_stds)))) + param_values, target_means, target_stds = ( + list(t) + for t in zip(*sorted(zip(param_values, target_means, target_stds)))) param_values = np.array(param_values) target_means = np.array(target_means) @@ -272,16 +327,20 @@ def _plot_hyperparameter_sensitivity(optimizer_path, hyperparam, ax, mode='final param_value = rank.aggregate['optimizer_hyperparams'][hyperparam] for value in values: ax.scatter(param_value, value, marker='x', color='b') - ax.plot((param_value, param_value), (min(values), max(values)), color='grey', linestyle='--') + ax.plot((param_value, param_value), (min(values), max(values)), + color='grey', + linestyle='--') ax.set_title(testproblem, fontsize=20) return ax -def plot_hyperparameter_sensitivity(path, hyperparam, mode='final', metric = 'valid_accuracies', +def plot_hyperparameter_sensitivity(path, + hyperparam, + mode='final', + metric='valid_accuracies', xscale='linear', plot_std=True, - reference_path = None): - + reference_path=None): """Plots the hyperparameter sensitivtiy of the optimizer. Args: @@ -300,25 +359,31 @@ def plot_hyperparameter_sensitivity(path, hyperparam, mode='final', metric = 'va pathes = _preprocess_path(path) for optimizer_path in pathes: metric = _determine_available_metric(optimizer_path, metric) - ax = _plot_hyperparameter_sensitivity(optimizer_path, hyperparam, ax, mode, metric, plot_std) + ax = _plot_hyperparameter_sensitivity(optimizer_path, hyperparam, ax, + mode, metric, plot_std) if reference_path is not None: pathes = _preprocess_path(reference_path) for reference_optimizer_path in pathes: - metric = _determine_available_metric(reference_optimizer_path, metric) - ax = _plot_hyperparameter_sensitivity(reference_optimizer_path, hyperparam, ax, mode, metric, plot_std) + metric = _determine_available_metric(reference_optimizer_path, + metric) + ax = _plot_hyperparameter_sensitivity(reference_optimizer_path, + hyperparam, ax, mode, metric, + plot_std) plt.xscale(xscale) plt.xlabel(hyperparam, fontsize=16) plt.ylabel(metric, fontsize=16) ax.tick_params(labelsize=14) ax.legend() - plt.show() return ax -def plot_final_metric_vs_tuning_rank(optimizer_path, metric='valid_accuracies'): +def plot_final_metric_vs_tuning_rank(optimizer_path, + metric='valid_accuracies'): metric = _determine_available_metric(optimizer_path, metric) - ranks = create_setting_analyzer_ranking(optimizer_path, mode='final', metric=metric) + ranks = create_setting_analyzer_ranking(optimizer_path, + mode='final', + metric=metric) means = [] fig, ax = plt.subplots() for idx, rank in enumerate(ranks): @@ -326,17 +391,22 @@ def plot_final_metric_vs_tuning_rank(optimizer_path, metric='valid_accuracies'): values = rank.get_all_final_values(metric) for value in values: ax.scatter(idx, value, marker='x', color='b') - ax.plot((idx, idx), (min(values), max(values)), color= 'grey', linestyle='--') + ax.plot((idx, idx), (min(values), max(values)), + color='grey', + linestyle='--') ax.plot(range(len(ranks)), means) - optimizer, testproblem = _get_optimizer_name_and_testproblem_from_path(optimizer_path) + optimizer, testproblem = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) ax.set_title(optimizer + ' on ' + testproblem) ax.set_xlabel('tuning rank') ax.set_ylabel(metric) - plt.show() return fig, ax -def get_performance_dictionary(optimizer_path, mode = 'most', metric = 'valid_accuracies', conv_perf_file = None): +def get_performance_dictionary(optimizer_path, + mode='most', + metric='valid_accuracies', + conv_perf_file=None): """Summarizes the performance of the optimizer. Args: @@ -349,7 +419,8 @@ def get_performance_dictionary(optimizer_path, mode = 'most', metric = 'valid_ac dict: A dictionary that holds the best setting and it's performance on the test set. """ metric = _determine_available_metric(optimizer_path, metric) - setting_analyzers_ranking = create_setting_analyzer_ranking(optimizer_path, mode, metric) + setting_analyzers_ranking = create_setting_analyzer_ranking( + optimizer_path, mode, metric) sett = setting_analyzers_ranking[0] perf_dict = dict() @@ -374,40 +445,86 @@ def get_performance_dictionary(optimizer_path, mode = 'most', metric = 'valid_ac return perf_dict -def _plot_optimizer_performance(path, ax = None, mode = 'most', metric = 'valid_accuracies'): +def _plot_optimizer_performance(path, + ax=None, + mode='most', + metric='valid_accuracies', + which='mean_and_std'): """Plots the training curve of an optimizer. Args: path (str): Path to the optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are plotted). - ax (matplotlib.axes.Axes): The axes to plot the trainig curves for all metrices. Must have 4 subaxes. + ax (matplotlib.axes.Axes): The axes to plot the trainig curves for all metrics. Must have 4 subaxes. mode (str): The mode by which to decide the best setting. metric (str): The metric by which to decide the best setting. + which (str): ['mean_and_std', 'median_and_quartiles', 'mean_and_std_log'] + - Solid plot mean or median or exponentiated mean of log + - Shaded plots standard deviation or lower/upper quartiles or exponentiated std of log + Returns: matplotlib.axes.Axes: The axes with the plots. - """ - metrices = ['test_losses', 'train_losses', 'test_accuracies', 'train_accuracies'] - if ax is None: # create default axis for all 4 metrices + loss_metrics = [ + 'test_losses', + 'train_losses', + ] + accuracy_metrics = [ + 'test_accuracies', + 'train_accuracies', + ] + metrics = loss_metrics + accuracy_metrics + + def is_loss(metric): + return metric in loss_metrics + + if ax is None: # create default axis for all 4 metrics _, ax = plt.subplots(4, 1, sharex='col') pathes = _preprocess_path(path) for optimizer_path in pathes: - setting_analyzer_ranking = create_setting_analyzer_ranking(optimizer_path, mode, metric) + setting_analyzer_ranking = create_setting_analyzer_ranking( + optimizer_path, mode, metric) setting = setting_analyzer_ranking[0] optimizer_name = os.path.basename(optimizer_path) - for idx, _metric in enumerate(metrices): + for idx, _metric in enumerate(metrics): if _metric in setting.aggregate: - mean = setting.aggregate[_metric]['mean'] - std = setting.aggregate[_metric]['std'] - ax[idx].plot(mean, label=optimizer_name) - ax[idx].fill_between(range(len(mean)), mean - std, mean + std, alpha=0.3) - _, testproblem = _get_optimizer_name_and_testproblem_from_path(optimizer_path) + if which == 'mean_and_std_log': + if is_loss(_metric): + ax[idx].set_yscale('log') + center = setting.aggregate[_metric]['mean_log'] + std = setting.aggregate[_metric]['std_log'] + low, high = center - std, center + std + else: + center = setting.aggregate[_metric]['mean'] + std = setting.aggregate[_metric]['std'] + low, high = center - std, center + std + elif which == 'mean_and_std': + center = setting.aggregate[_metric]['mean'] + std = setting.aggregate[_metric]['std'] + low, high = center - std, center + std + elif which == 'median_and_quartiles': + center = setting.aggregate[_metric]['median'] + low = setting.aggregate[_metric]['lower_quartile'] + high = setting.aggregate[_metric]['upper_quartile'] + else: + raise ValueError("Unknown value which={}".format(which)) + + ax[idx].plot(center, label=optimizer_name) + ax[idx].fill_between(range(len(center)), low, high, alpha=0.3) + + _, testproblem = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) ax[0].set_title(testproblem, fontsize=18) return ax -def plot_optimizer_performance(path, ax = None, mode = 'most', metric = 'valid_accuracies', reference_path = None): +def plot_optimizer_performance(path, + ax=None, + mode='most', + metric='valid_accuracies', + reference_path=None, + which='mean_and_std'): """Plots the training curve of optimizers and addionally plots reference results from the ``reference_path`` Args: @@ -416,20 +533,29 @@ def plot_optimizer_performance(path, ax = None, mode = 'most', metric = 'valid_a mode (str): The mode by which to decide the best setting. metric (str): The metric by which to decide the best setting. reference_path (str): Path to the reference optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are taken as reference). + which (str): ['mean_and_std', 'median_and_quartiles', 'mean_and_std_log'] + - Solid plot mean or median or exponentiated mean of log + - Shaded plots standard deviation or lower/upper quartiles or exponentiated std of log Returns: matplotlib.axes.Axes: The axes with the plots. """ - ax = _plot_optimizer_performance(path, ax, mode, metric) + ax = _plot_optimizer_performance(path, ax, mode, metric, which=which) if reference_path is not None: - ax = _plot_optimizer_performance(reference_path, ax, mode, metric) - - metrices = ['test_losses', 'train_losses', 'test_accuracies', 'train_accuracies'] + ax = _plot_optimizer_performance(reference_path, + ax, + mode, + metric, + which=which) + + metrices = [ + 'test_losses', 'train_losses', 'test_accuracies', 'train_accuracies' + ] for idx, _metric in enumerate(metrices): # set y labels - ax[idx].set_ylabel(_metric, fontsize = 14) + ax[idx].set_ylabel(_metric, fontsize=14) # rescale plots # ax[idx] = _rescale_ax(ax[idx]) ax[idx].tick_params(labelsize=12) @@ -437,8 +563,6 @@ def plot_optimizer_performance(path, ax = None, mode = 'most', metric = 'valid_a # show optimizer legends ax[0].legend(fontsize=12) - ax[3].set_xlabel('epochs', fontsize = 14) + ax[3].set_xlabel('epochs', fontsize=14) - plt.show() return ax - diff --git a/deepobs/analyzer/shared_utils.py b/deepobs/analyzer/shared_utils.py index 30ba2e75..845d5c6f 100644 --- a/deepobs/analyzer/shared_utils.py +++ b/deepobs/analyzer/shared_utils.py @@ -1,15 +1,17 @@ import json import os -import numpy as np import warnings +import numpy as np + def _check_setting_folder_is_not_empty(setting_path): runs = [run for run in os.listdir(setting_path) if 'json' in run] try: assert len(runs) > 0 except AssertionError: - print('Found a setting folder with no runs inside: {0:s}'.format(setting_path)) + print('Found a setting folder with no runs inside: {0:s}'.format( + setting_path)) def _check_output_structure(path, file_name): @@ -30,59 +32,86 @@ def _check_output_structure(path, file_name): assert 'test_losses' in json_data # all must have the same length - assert len(json_data['train_losses']) == len(json_data['test_losses']) == len(json_data['valid_losses']) == json_data['num_epochs']+1 + assert len(json_data['train_losses']) == len( + json_data['test_losses']) == len( + json_data['valid_losses']) == json_data['num_epochs'] + 1 except AssertionError as e: - print('Found corrupted output file: {0:s} in path: {1:s}'.format(file_name, path)) + print('Found corrupted output file: {0:s} in path: {1:s}'.format( + file_name, path)) -def aggregate_runs(setting_folder): +def aggregate_runs(setting_folder, custom_metrics=None): """Aggregates all seed runs for a setting. Args: setting_folder (str): The path to the setting folder. + custom_metrics (list(str)): Additional metrics that will be extracted if available Returns: A dictionary that contains the aggregated mean and std of all metrices, as well as the meta data. """ + dobs_metrics = [ + 'train_losses', 'valid_losses', 'test_losses', 'train_accuracies', + 'valid_accuracies', 'test_accuracies' + ] + if custom_metrics is None: + custom_metrics = [] + runs = [run for run in os.listdir(setting_folder) if run.endswith(".json")] - # metrices - train_losses = [] - valid_losses = [] - test_losses = [] - train_accuracies = [] - valid_accuracies = [] - test_accuracies = [] - for run in runs: - json_data = _load_json(setting_folder, run) - train_losses.append(json_data['train_losses']) + runs = [run for run in os.listdir(setting_folder) if run.endswith(".json")] + if not runs: + raise RuntimeError(f"No .json file in {setting_folder}") - # TODO remove try-except once validation metrices are available for the baselines - try: - valid_losses.append(json_data['valid_losses']) - except KeyError: - pass + def no_data(): + return [] - test_losses.append(json_data['test_losses']) - # just add accuracies to the aggregate if they are available - if 'train_accuracies' in json_data : - train_accuracies.append(json_data['train_accuracies']) + all_metrics = dobs_metrics + custom_metrics + all_metrics_data = {m: no_data() for m in all_metrics} - # TODO remove try-except once validation metrices are available for the baselines + for run in runs: + json_data = _load_json(setting_folder, run) + for metric in all_metrics: try: - valid_accuracies.append(json_data['valid_accuracies']) + run_data = json_data[metric] except KeyError: - pass - - test_accuracies.append(json_data['test_accuracies']) + run_data = no_data() + all_metrics_data[metric].append(run_data) + + # custom metrics: fill with nans if run quit earlier + metrics_require_nans = set() + nans_inserted = 0 + for metric in custom_metrics: + max_num_points = max( + len(run_data) for run_data in all_metrics_data[metric]) + # fill up with nans + for run_data in all_metrics_data[metric]: + while len(run_data) < max_num_points: + metrics_require_nans.add(metric) + nans_inserted += 1 + run_data.append(float('nan')) + if nans_inserted > 0: + print( + "[CUSTOM METRICS]: Needed to insert {} NaNs".format(nans_inserted)) + print("[CUSTOM METRICS]: Affected metrics {}".format( + metrics_require_nans)) aggregate = dict() - for metrics in ['train_losses', 'valid_losses', 'test_losses', 'train_accuracies', 'valid_accuracies', 'test_accuracies']: + for metric in all_metrics: + data = np.array(all_metrics_data[metric]) # only add the metric if available - if len(eval(metrics)) != 0: - aggregate[metrics] = { - 'mean': np.mean(eval(metrics), axis=0), - 'std': np.std(eval(metrics), axis=0), - 'all_final_values': [met[-1] for met in eval(metrics)] - } + is_empty = data.shape[1] == 0 + if not is_empty: + aggregate[metric] = { + 'mean': np.mean(data, axis=0), + 'std': np.std(data, axis=0), + 'all_final_values': [met[-1] for met in data], + 'lower_quartile': np.quantile(data, 0.25, axis=0), + 'median': np.median(data, axis=0), + 'upper_quartile': np.quantile(data, 0.75, axis=0), + 'mean_log': np.power(10, np.mean(np.log10(data), axis=0)), + 'std_log': np.power(10, np.std(np.log10(data), axis=0)), + 'min': np.min(data, axis=0), + 'max': np.max(data, axis=0), + } # merge meta data aggregate['optimizer_hyperparams'] = json_data['optimizer_hyperparams'] aggregate['training_params'] = json_data['training_params'] @@ -95,7 +124,10 @@ def aggregate_runs(setting_folder): def _read_all_settings_folders(optimizer_path): """Returns a list of all setting folders in ``optimizer_path``""" optimizer_path = os.path.join(optimizer_path) - return [f for f in os.listdir(optimizer_path) if os.path.isdir(os.path.join(optimizer_path, f)) and 'num_epochs' in f] + return [ + f for f in os.listdir(optimizer_path) + if os.path.isdir(os.path.join(optimizer_path, f)) and 'num_epochs' in f + ] def _check_if_metric_is_available(optimizer_path, metric): @@ -111,22 +143,29 @@ def _check_if_metric_is_available(optimizer_path, metric): return False -def _determine_available_metric(optimizer_path, metric, default_metric = 'valid_losses'): +def _determine_available_metric(optimizer_path, + metric, + default_metric='valid_losses'): """Checks if the metric ``metric`` is availabe for the runs in ``optimizer_path``. If not, it returns the fallback metric ``default_metric``.""" - optimizer_name, testproblem_name = _get_optimizer_name_and_testproblem_from_path(optimizer_path) + optimizer_name, testproblem_name = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) if _check_if_metric_is_available(optimizer_path, metric): return metric else: # TODO remove if-else once validation metrics are available for the baselines if _check_if_metric_is_available(optimizer_path, default_metric): - warnings.warn('Metric {0:s} does not exist for testproblem {1:s}. We now use fallback metric {2:s}'.format( - metric, testproblem_name, default_metric), RuntimeWarning) + warnings.warn( + 'Metric {0:s} does not exist for testproblem {1:s}. We now use fallback metric {2:s}' + .format(metric, testproblem_name, + default_metric), RuntimeWarning) return default_metric else: - warnings.warn('Cannot fallback to metric {0:s} for optimizer {1:s} on testproblem {2:s}. Will now fallback to metric test_losses'.format( - default_metric, optimizer_name, testproblem_name), RuntimeWarning) + warnings.warn( + 'Cannot fallback to metric {0:s} for optimizer {1:s} on testproblem {2:s}. Will now fallback to metric test_losses' + .format(default_metric, optimizer_name, + testproblem_name), RuntimeWarning) return 'test_losses' @@ -149,18 +188,22 @@ def _clear_json(path, file): def _load_json(path, file_name): with open(os.path.join(path, file_name), "r") as f: - json_data = json.load(f) + json_data = json.load(f) return json_data -def _get_all_setting_analyzer(optimizer_path): +def _get_all_setting_analyzer(optimizer_path, custom_metrics=None): """Creates a list of SettingAnalyzers (one for each setting in ``optimizer_path``)""" + if custom_metrics is None: + custom_metrics = [] + optimizer_path = os.path.join(optimizer_path) setting_folders = _read_all_settings_folders(optimizer_path) setting_analyzers = [] for sett in setting_folders: sett_path = os.path.join(optimizer_path, sett) - setting_analyzers.append(SettingAnalyzer(sett_path)) + setting_analyzers.append( + SettingAnalyzer(sett_path, custom_metrics=custom_metrics)) return setting_analyzers @@ -170,7 +213,10 @@ def _get_optimizer_name_and_testproblem_from_path(optimizer_path): return optimizer_name, testproblem -def create_setting_analyzer_ranking(optimizer_path, mode = 'final', metric = 'valid_accuracies'): +def create_setting_analyzer_ranking(optimizer_path, + mode='final', + metric='valid_accuracies', + custom_metrics=None): """Reads in all settings in ``optimizer_path`` and sets up a ranking by returning an ordered list of SettingAnalyzers. Args: optimizer_path (str): The path to the optimizer to analyse. @@ -179,8 +225,12 @@ def create_setting_analyzer_ranking(optimizer_path, mode = 'final', metric = 'va Returns: An ordered list of SettingAnalyzers. I.e. the first item is considered 'the best one' etc. """ + if custom_metrics is None: + custom_metrics = [] + metric = _determine_available_metric(optimizer_path, metric) - setting_analyzers = _get_all_setting_analyzer(optimizer_path) + setting_analyzers = _get_all_setting_analyzer( + optimizer_path, custom_metrics=custom_metrics) if 'acc' in metric: sgn = -1 @@ -188,17 +238,29 @@ def create_setting_analyzer_ranking(optimizer_path, mode = 'final', metric = 'va sgn = 1 if mode == 'final': - setting_analyzers_ordered = sorted(setting_analyzers, key=lambda idx: sgn * idx.get_final_value(metric)) + setting_analyzers_ordered = sorted( + setting_analyzers, + key=lambda idx: sgn * idx.get_final_value(metric)) elif mode == 'best': - setting_analyzers_ordered = sorted(setting_analyzers, key=lambda idx: sgn * idx.get_best_value(metric)) + setting_analyzers_ordered = sorted( + setting_analyzers, + key=lambda idx: sgn * idx.get_best_value(metric)) elif mode == 'most': # if all have the same amount of runs, i.e. no 'most' avalaible, fall back to 'final' - if all(x.n_runs == setting_analyzers[0].n_runs for x in setting_analyzers): - optimizer_name, testproblem_name = _get_optimizer_name_and_testproblem_from_path(optimizer_path) - warnings.warn('All settings for {0:s} on test problem {1:s} have the same number of seeds runs. Mode \'most\' does not make sense and we use the fallback mode \'final\''.format(optimizer_path, testproblem_name), RuntimeWarning) - setting_analyzers_ordered = sorted(setting_analyzers, key=lambda idx: sgn * idx.get_final_value(metric)) + if all(x.n_runs == setting_analyzers[0].n_runs + for x in setting_analyzers): + optimizer_name, testproblem_name = _get_optimizer_name_and_testproblem_from_path( + optimizer_path) + warnings.warn( + 'All settings for {0:s} on test problem {1:s} have the same number of seeds runs. Mode \'most\' does not make sense and we use the fallback mode \'final\'' + .format(optimizer_path, testproblem_name), RuntimeWarning) + setting_analyzers_ordered = sorted( + setting_analyzers, + key=lambda idx: sgn * idx.get_final_value(metric)) else: - setting_analyzers_ordered = sorted(setting_analyzers, key=lambda idx: idx.n_runs, reverse=True) + setting_analyzers_ordered = sorted(setting_analyzers, + key=lambda idx: idx.n_runs, + reverse=True) else: raise RuntimeError('Mode not implemented') @@ -213,28 +275,32 @@ class SettingAnalyzer: aggregate (dictionary): Contains the mean and std of the runs as well as the meta data. n_runs (int): The number of seed runs that were performed for this setting. """ - - def __init__(self, path): + def __init__(self, path, custom_metrics=None): """Initializes a new SettingAnalyzer instance. Args: path (str): String to the setting folder. """ + if custom_metrics is None: + custom_metrics = [] self.path = path self.n_runs = self.__get_number_of_runs() - self.aggregate = aggregate_runs(path) + self.aggregate = aggregate_runs(path, custom_metrics=custom_metrics) def __get_number_of_runs(self): """Calculates the total number of seed runs.""" - return len([run for run in os.listdir(self.path) if run.endswith(".json")]) + return len( + [run for run in os.listdir(self.path) if run.endswith(".json")]) def get_final_value(self, metric): """Get the final (mean) value of the metric.""" try: return self.aggregate[metric]['mean'][-1] except KeyError: - raise KeyError('Metric {0:s} not available for testproblem {1:s} of this setting'.format(metric, self.aggregate['testproblem'])) + raise KeyError( + 'Metric {0:s} not available for testproblem {1:s} of this setting' + .format(metric, self.aggregate['testproblem'])) def get_best_value(self, metric): """Get the best (mean) value of the metric.""" @@ -246,7 +312,9 @@ def get_best_value(self, metric): else: raise NotImplementedError except KeyError: - raise KeyError('Metric {0:s} not available for testproblem {1:s} of this setting'.format(metric, self.aggregate['testproblem'])) + raise KeyError( + 'Metric {0:s} not available for testproblem {1:s} of this setting' + .format(metric, self.aggregate['testproblem'])) def calculate_speed(self, conv_perf_file): """Calculates the speed of the setting.""" @@ -283,4 +351,6 @@ def get_all_final_values(self, metric): try: return self.aggregate[metric]['all_final_values'] except KeyError: - raise KeyError('Metric {0:s} not available for testproblem {1:s} of this setting'.format(metric, self.aggregate['testproblem'])) + raise KeyError( + 'Metric {0:s} not available for testproblem {1:s} of this setting' + .format(metric, self.aggregate['testproblem'])) diff --git a/deepobs/pytorch/testproblems/testproblem.py b/deepobs/pytorch/testproblems/testproblem.py index 78130b3a..ca120319 100644 --- a/deepobs/pytorch/testproblems/testproblem.py +++ b/deepobs/pytorch/testproblems/testproblem.py @@ -48,6 +48,8 @@ def __init__(self, batch_size, weight_decay=None): self._weight_decay = weight_decay self._device = torch.device(config.get_default_device()) + self._batch_count = 0 + # Public attributes by which to interact with test problems. These have to # be created by the set_up function of sub-classes. self.data = None @@ -89,10 +91,12 @@ def test_init_op(self): def _get_next_batch(self): """Returns the next batch from the iterator.""" + self._batch_count += 1 return next(self._iterator) def get_batch_loss_and_accuracy(self, return_forward_func = False, + evaluate_forward_func = True, reduction = 'mean', add_regularization_if_available = True): @@ -141,7 +145,10 @@ def _get_batch_loss_and_accuracy(): return loss + regularizer_loss, accuracy if return_forward_func: - return _get_batch_loss_and_accuracy(), _get_batch_loss_and_accuracy + if evaluate_forward_func is True: + return _get_batch_loss_and_accuracy(), _get_batch_loss_and_accuracy + else: + return _get_batch_loss_and_accuracy else: return _get_batch_loss_and_accuracy()