ADD: method to extract wallclocktime from exp

automl · Sep 4, 2015 · 85080b8 · 85080b8
1 parent 4c7607b
commit 85080b8
Show file tree

Hide file tree

Showing 2 changed files with 148 additions and 19 deletions.
diff --git a/HPOlib/Plotting/plot_trajectory.py b/HPOlib/Plotting/plot_trajectory.py
@@ -40,8 +40,8 @@
 def plot_trajectories(trial_list, name_list, x_ticks,
                       test_trials=None, optimum=0,
                       aggregation="mean", scale_std=1,
-                      log=False, properties=None,
-                      y_max=None, y_min=None,
+                      logy=False, logx=False, properties=None,
+                      y_max=None, y_min=None, x_max=None, x_min=0,
                       print_lenght_trial_list=True,
                       ylabel="Loss", xlabel=None, title="", save=""):
     """Plot trajectory
@@ -97,28 +97,31 @@ def plot_trajectories(trial_list, name_list, x_ticks,
 
     # One trialList represents all runs from one optimizer
     for i in range(len(trial_list)):
+        if isinstance(x_ticks[0], numpy.ndarray) or \
+                isinstance(x_ticks[0], list):
+            x = x_ticks[i]
+        else:
+            x = x_ticks
+
         performance = numpy.array(trial_list[i]) - optimum
         if aggregation == "mean":
             m = numpy.mean(performance, axis=0)
         else:
             m = numpy.median(performance, axis=0)
 
-        if log:
+        if logy:
             m = numpy.log10(m)
 
+        if logx:
+            x = [max(0.00001, xe) for xe in x]
+
         if aggregation == "mean":
             lower = m - numpy.std(performance, axis=0) * scale_std
             upper = m + numpy.std(performance, axis=0) * scale_std
         else:
             lower = numpy.percentile(performance, axis=0, q=25)
             upper = numpy.percentile(performance, axis=0, q=75)
 
-        if isinstance(x_ticks[0], numpy.ndarray) or \
-                isinstance(x_ticks[0], list):
-            x = x_ticks[i]
-        else:
-            x = x_ticks
-
         assert len(m) == len(x), "%d != %d" % (len(m), len(x))
 
         marker = properties["markers"].next()
@@ -147,12 +150,19 @@ def plot_trajectories(trial_list, name_list, x_ticks,
             max_val = numpy.max([numpy.max(test_trials[i][1]), max_val])
 
     # Set y, x label
-    if log:
-       ylabel = "log10(%s)" % ylabel
+    if logy:
+        ylabel = "log10(%s)" % ylabel
+        if y_max is not None:
+            y_max = numpy.log10(y_max)
+        if y_min is not None:
+            y_min = numpy.log10(y_min)
+
     if scale_std != 1:
        ylabel = "%s, %s * std" % (ylabel, scale_std)
-    ax1.set_ylabel(ylabel, fontsize=properties["labelfontsize"])
+    if logx:
+        ax1.set_xscale('log')
 
+    ax1.set_ylabel(ylabel, fontsize=properties["labelfontsize"])
     ax1.set_xlabel(xlabel, fontsize=properties["labelfontsize"])
 
     # Set legend
@@ -164,18 +174,23 @@ def plot_trajectories(trial_list, name_list, x_ticks,
         ax1.set_ylim([min_val - 0.1 * abs((max_val - min_val)),
                       max_val + 0.1 * abs((max_val - min_val))])
     elif y_max is None and y_min is not None:
-        assert y_min < max_val
+        assert y_min < max_val, "%f < %f" % (y_min, max_val)
         ax1.set_ylim([y_min,
                       max_val + 0.1 * abs((max_val - min_val))])
     elif y_max is not None and y_min is None:
-        assert min_val < y_max
+        assert min_val < y_max, "%f < %f" % (min_val, y_max)
         ax1.set_ylim([min_val - 0.1 * abs((max_val - min_val)),
                       y_max])
     else:
-        assert y_min < y_max
+        assert y_min < y_max, "%f < %f" % (y_min, y_max)
         ax1.set_ylim([y_min, y_max])
 
     ax1.set_xlim([0, max_trials])
+    if x_max is not None:
+        ax1.set_xlim([ax1.get_xlim()[0], x_max])
+    if x_min is not None:
+        ax1.set_xlim([x_min, ax1.get_xlim()[1]])
+
 
     matplotlib.pyplot.tight_layout()
     matplotlib.pyplot.subplots_adjust(top=0.85)

diff --git a/HPOlib/Plotting/plot_util.py b/HPOlib/Plotting/plot_util.py
@@ -31,6 +31,7 @@
 # A super-simple cache for unpickled objects...
 cache = dict()
 
+
 def get_empty_iterator():
     return itertools.cycle([None])
 
@@ -135,6 +136,100 @@ def get_best_dict(name_list, pickles, cut=sys.maxint):
     return best_dict, idx_dict, keys
 
 
+def fill_trajectories(trace_list, times_list):
+    """ Each trajectory must have the exact same number of entries
+    and timestamps
+
+    trace_list: list of n lists with y values
+    times_list: list of n lists with x values
+
+    returns a list of n lists where for each x value and each y-list an
+    entry exists.
+
+    Example:
+
+    trace_list = [[5,3], [5,2,1]]
+    times_list = [[1,2], [1,3,5]]
+
+    returns:
+    trajectories = [[5, 3, 3, 3], [5, 5, 2, 1]]
+    times = [1,2,3,5]
+    """
+    # We need to define the max value =
+    # what is measured before the first evaluation
+    max_value = np.max([np.max(ls) for ls in trace_list])
+
+    number_exp = len(trace_list)
+    new_trajectories = list()
+    new_times = list()
+    for i in range(number_exp):
+        new_trajectories.append(list())
+        new_times.append(list())
+    # noinspection PyUnusedLocal
+    counter = [1 for i in range(number_exp)]
+    finish = False
+
+    # We need to insert the max values in the beginning
+    # and the min values in the end
+    for i in range(number_exp):
+        trace_list[i].insert(0, max_value)
+        trace_list[i].append(np.min(trace_list[i]))
+        times_list[i].insert(0, 0)
+        times_list[i].append(sys.maxint)
+
+    # Add all possible time values
+    while not finish:
+        min_idx = np.argmin([times_list[idx][counter[idx]]
+                             for idx in range(number_exp)])
+        counter[min_idx] += 1
+        for idx in range(number_exp):
+            new_times[idx].append(times_list[min_idx][counter[min_idx] - 1])
+            new_trajectories[idx].append(trace_list[idx][counter[idx] - 1])
+        # Check if we're finished
+        for i in range(number_exp):
+            finish = True
+            if counter[i] < len(trace_list[i]) - 1:
+                finish = False
+                break
+
+    times = new_times
+    trajectories = new_trajectories
+    tmp_times = list()
+
+    # Sanitize lists and delete double entries
+    for i in range(number_exp):
+        tmp_times = list()
+        tmp_traj = list()
+        for t in range(len(times[i]) - 1):
+            if times[i][t + 1] != times[i][t] and not np.isnan(times[i][t]):
+                tmp_times.append(times[i][t])
+                tmp_traj.append(trajectories[i][t])
+        tmp_times.append(times[i][-1])
+        tmp_traj.append(trajectories[i][-1])
+        times[i] = tmp_times
+        trajectories[i] = tmp_traj
+
+    # We need only one list for all times
+    times = tmp_times
+
+    # Now clean data as sometimes the best val doesn't change over time
+    last_perf = [i*10 for i in range(number_exp)]  # dummy entry
+    time_ = list()
+    performance = list([list() for i in range(number_exp)])
+    for idx, t in enumerate(times):
+        # print t, idx, last_perf, perf_list[0][idx], perf_list[1][idx]
+        diff = sum([np.abs(last_perf[i] - trajectories[i][idx]) for i in range(number_exp)])
+        if diff != 0 or idx == 0 or idx == len(times) - 1:
+            # always use first and last entry
+            time_.append(t)
+            [performance[i].append(trajectories[i][idx]) for i in range(number_exp)]
+        last_perf = [p[idx] for p in trajectories]
+
+    trajectories = performance
+    times = time_
+    return trajectories, times
+
+
 def extract_trajectory(experiment, cut=sys.maxint, test=False):
     """Extract a list where the value at position i is the current best after i configurations."""
     if not isinstance(cut, int):
@@ -191,12 +286,31 @@ def extract_results(experiment, cut=sys.maxint):
     return trl
 
 
-def extract_runtime_timestamps(trials, cut=sys.maxint):
-    # return a list like (20, 53, 101, 200)
+def extract_runtime_timestamps(trials, cut=sys.maxint, conf_overhead=False):
+    """Extracts timesteps for a list of trials
+    trials = list of trials as in a HPOlib.pkl
+    cut = consider only that many trials
+    conf_overhead = add conf overhead, if false only add up target algorithm time
+
+    return a list like (0, 20, 53, 101, 200)
+    """
+    # (TODO): This does not work for crossvalidation + intensify
+
     time_list = list()
     time_list.append(0)
-    for trial in trials["trials"][:cut+1]:
-        time_list.append(np.sum(trial["instance_durations"]) + time_list[-1])
+    for idx, trial in enumerate(trials["trials"][:cut+1]):
+        if trial["status"] != 3:
+            # Ignore this trial, it is not yet finished
+            continue
+
+        if conf_overhead:
+            if len(trials["starttime"]) > 1:
+                raise ValueError("Cannot extract runtimes for restarted "
+                                 "experiments, please implement me")
+
+            time_list.append(trials["cv_starttime"][idx] - trials["starttime"][0] + trial["duration"])
+        else:
+            time_list.append(np.sum(trial["instance_durations"]) + time_list[-1])
     return time_list