Merge branch 'planner-scores'

aibasel · Apr 22, 2022 · 1462729 · 1462729
2 parents 4705e06 + 17e4841
commit 1462729
Show file tree

Hide file tree

Showing 16 changed files with 109 additions and 31 deletions.
diff --git a/dev/make-release-notes.py b/dev/make-release-notes.py
@@ -5,7 +5,7 @@
 
 _, VERSION, CHANGELOG, LIST = sys.argv
 
-REGEX = fr"""
+REGEX = rf"""
 Changelog\n
 =========\n
 \n

diff --git a/docs/autobuild.sh b/docs/autobuild.sh
@@ -2,6 +2,7 @@
 # Automatically rebuild Sphinx documentation when files change.
 
 DOCS="$( dirname "$0" )"
+DOCS="$( realpath "$DOCS" )"
 REPO="$( realpath "$DOCS/../" )"
 
 cd "$REPO/docs"

diff --git a/docs/news.rst b/docs/news.rst
@@ -14,6 +14,8 @@ Downward Lab
 * Fix header sizes in HTML reports (Jendrik Seipp).
 * Include domains in attribute overview tables even if none of their tasks has an
   attribute value for all algorithms (Jendrik Seipp).
+* Compute "score_planner_time" and "score_planner_memory" attributes in planner
+  parser (Jendrik Seipp).
 
 
 v7.0 (2021-10-24)

diff --git a/downward/experiment.py b/downward/experiment.py
@@ -125,7 +125,7 @@ class FastDownwardExperiment(Experiment):
     #: "translator_peak_memory", "translator_time_done".
     #:
     #: Parsed attributes: "node", "planner_memory", "planner_time",
-    #: "planner_wall_clock_time".
+    #: "planner_wall_clock_time", "score_planner_memory", "score_planner_time".
     PLANNER_PARSER = os.path.join(DOWNWARD_SCRIPTS_DIR, "planner-parser.py")
 
     def __init__(self, path=None, environment=None, revision_cache=None):

diff --git a/downward/reports/__init__.py b/downward/reports/__init__.py
@@ -75,6 +75,7 @@ def __init__(self, **kwargs):
         >>> # Use a filter function to select algorithms.
         >>> def only_blind_and_lmcut(run):
         ...     return run["algorithm"] in ["blind", "lmcut"]
+        ...
         >>> report = PlanningReport(filter=only_blind_and_lmcut)
 
         >>> # Use "filter_algorithm" to select and *order* algorithms.
@@ -96,6 +97,7 @@ def __init__(self, **kwargs):
         ...             times = [t for t in times if t is not None]
         ...             map[(domain, problem)] = min(times) if times else None
         ...         return str(map)
+        ...
 
         """
         # Set non-default options for some attributes.

diff --git a/downward/reports/scatter.py b/downward/reports/scatter.py
@@ -57,6 +57,7 @@ def __init__(
         ...     # run2['domain'] has the same value, because we always
         ...     # compare two runs of the same problem.
         ...     return run1["domain"]
+        ...
 
         Example grouping by difficulty:
 
@@ -68,6 +69,7 @@ def __init__(
         ...     if time1 == time2:
         ...         return "equal"
         ...     return "worse"
+        ...
 
         >>> from downward.experiment import FastDownwardExperiment
         >>> exp = FastDownwardExperiment()

diff --git a/downward/reports/scatter_matplotlib.py b/downward/reports/scatter_matplotlib.py
@@ -33,7 +33,7 @@ def create_legend(self):
     @staticmethod
     def _get_max_supported_value(scale):
         if scale == "linear":
-            return 10 ** 12  # Larger values cause numerical problems.
+            return 10**12  # Larger values cause numerical problems.
         else:
             assert scale in {"log", "symlog"}, scale
             return sys.maxsize

diff --git a/downward/scripts/planner-parser.py b/downward/scripts/planner-parser.py
@@ -1,5 +1,6 @@
 #! /usr/bin/env python
 
+from lab import tools
 from lab.parser import Parser
 
 
@@ -11,21 +12,69 @@ def add_planner_memory(content, props):
 
 
 def add_planner_time(content, props):
+    # Newer planner versions print planner time and we parse it below. Don't overwrite it.
+    if "planner_time" not in props:
+        return
     try:
         props["planner_time"] = props["translator_time_done"] + props["total_time"]
     except KeyError:
         pass
 
 
+def add_planner_scores(content, props):
+    """
+    Compute scores for overall planner runtime and memory usage.
+
+    Best possible performance in a task is counted as 1, while failure to solve
+    a task and worst performance are counted as 0.
+
+    """
+    success = props["coverage"] or props["unsolvable"]
+
+    try:
+        time_limit = props["planner_time_limit"]
+    except KeyError:
+        print("planner_time_limit missing -> can't compute planner time score")
+    else:
+        props["score_planner_time"] = tools.compute_log_score(
+            success, props.get("planner_time"), lower_bound=1.0, upper_bound=time_limit
+        )
+
+    try:
+        memory_limit_kb = props["planner_memory_limit"] * 1024
+    except KeyError:
+        print("planner_memory_limit missing -> can't compute planner memory score")
+    else:
+        props["score_planner_memory"] = tools.compute_log_score(
+            success,
+            props.get("planner_memory"),
+            lower_bound=2000,
+            upper_bound=memory_limit_kb,
+        )
+
+
 class PlannerParser(Parser):
     def __init__(self):
         Parser.__init__(self)
-        self.add_function(add_planner_memory)
-        self.add_function(add_planner_time)
 
+        self.add_pattern(
+            "planner_time_limit",
+            r"planner time limit: (.+)s",
+            type=float,
+        )
+        self.add_pattern(
+            "planner_memory_limit",
+            r"planner memory limit: (.+) MB",
+            type=int,
+        )
         self.add_pattern(
             "node", r"node: (.+)\n", type=str, file="driver.log", required=True
         )
+        self.add_pattern(
+            "planner_time",
+            r"Planner time: (.+)s",
+            type=float,
+        )
         self.add_pattern(
             "planner_wall_clock_time",
             r"planner wall-clock time: (.+)s",
@@ -34,6 +83,10 @@ def __init__(self):
             required=True,
         )
 
+        self.add_function(add_planner_memory)
+        self.add_function(add_planner_time)
+        self.add_function(add_planner_scores)
+
 
 def main():
     parser = PlannerParser()

diff --git a/downward/scripts/single-search-parser.py b/downward/scripts/single-search-parser.py
@@ -4,10 +4,10 @@
 Regular expressions and functions for parsing single-search runs of Fast Downward.
 """
 
-import math
 import re
 import sys
 
+from lab import tools
 from lab.parser import Parser
 
 
@@ -108,40 +108,32 @@ def add_scores(content, props):
     to solve a task and worst performance are counted as 0.
 
     """
-
-    def log_score(value, min_bound, max_bound):
-        if value is None or not props["coverage"]:
-            return 0
-        value = max(value, min_bound)
-        value = min(value, max_bound)
-        raw_score = math.log(value) - math.log(max_bound)
-        best_raw_score = math.log(min_bound) - math.log(max_bound)
-        return raw_score / best_raw_score
+    success = props["coverage"] or props["unsolvable"]
 
     for attr in ("expansions", "evaluations", "generated"):
-        props["score_" + attr] = log_score(
-            props.get(attr), min_bound=100, max_bound=1e6
+        props["score_" + attr] = tools.compute_log_score(
+            success, props.get(attr), lower_bound=100, upper_bound=1e6
         )
 
     try:
         max_time = props["limit_search_time"]
     except KeyError:
         print("search time limit missing -> can't compute time scores")
     else:
-        props["score_total_time"] = log_score(
-            props.get("total_time"), min_bound=1.0, max_bound=max_time
+        props["score_total_time"] = tools.compute_log_score(
+            success, props.get("total_time"), lower_bound=1.0, upper_bound=max_time
         )
-        props["score_search_time"] = log_score(
-            props.get("search_time"), min_bound=1.0, max_bound=max_time
+        props["score_search_time"] = tools.compute_log_score(
+            success, props.get("search_time"), lower_bound=1.0, upper_bound=max_time
         )
 
     try:
         max_memory_kb = props["limit_search_memory"] * 1024
     except KeyError:
         print("search memory limit missing -> can't compute memory score")
     else:
-        props["score_memory"] = log_score(
-            props.get("memory"), min_bound=2000, max_bound=max_memory_kb
+        props["score_memory"] = tools.compute_log_score(
+            success, props.get("memory"), lower_bound=2000, upper_bound=max_memory_kb
         )
 
 

diff --git a/examples/lmcut.py b/examples/lmcut.py
@@ -12,7 +12,7 @@
 from lab.environments import BaselSlurmEnvironment, LocalEnvironment
 
 
-ATTRIBUTES = ["coverage", "error", "expansions", "total_time"]
+ATTRIBUTES = ["coverage", "error", "expansions", "planner_memory", "planner_time"]
 
 NODE = platform.node()
 if NODE.endswith((".cluster.bc2.ch", ".scicore.unibas.ch")):

diff --git a/examples/showcase-options.py b/examples/showcase-options.py
@@ -172,11 +172,15 @@ def eval_dir(num):
     name="report-abs-p-filter",
 )
 exp.add_report(
-    AbsoluteReport(attributes=["coverage", "error"], format="tex"),
+    AbsoluteReport(
+        attributes=["coverage", "error", "score_planner_time"], format="tex"
+    ),
     outfile="report-abs-combined.tex",
 )
 exp.add_report(
-    AbsoluteReport(attributes=["coverage", "error"], format="html"),
+    AbsoluteReport(
+        attributes=["coverage", "error", "score_planner_memory"], format="html"
+    ),
     outfile="report-abs-combined.html",
 )
 exp.add_report(

diff --git a/lab/cached_revision.py b/lab/cached_revision.py
@@ -78,6 +78,7 @@ def __init__(self, repo, rev, build_cmd, exclude=None):
         ...     rev = "main"
         ...     cr = CachedRevision(repo, rev, ["./build.py"], exclude=["experiments"])
         ...     # cr.cache(revision_cache)  # Uncomment to actually cache the code.
+        ...
 
         You can now copy the cached repo to your experiment:
 

diff --git a/lab/reports/__init__.py b/lab/reports/__init__.py
@@ -36,7 +36,7 @@ def geometric_mean(values):
     """
     assert None not in values
     exp = 1.0 / len(values)
-    return tools.product([val ** exp for val in values])
+    return tools.product([val**exp for val in values])
 
 
 def finite_sum(values):
@@ -200,6 +200,7 @@ def __init__(self, attributes=None, format="html", filter=None, **kwargs):
 
         >>> def low_init_h(run):
         ...     return run["initial_h_value"] <= 100
+        ...
         >>> report = Report(filter=low_init_h)
 
         Only include runs from "blocks" and "barman" with a timeout:
@@ -214,6 +215,7 @@ def __init__(self, attributes=None, format="html", filter=None, **kwargs):
         ...     if expansions is not None and time:
         ...         run["expansions_per_time"] = expansions / time
         ...     return run
+        ...
         >>> report = Report(
         ...     attributes=["expansions_per_time"], filter=[add_expansions_per_time]
         ... )
@@ -225,6 +227,7 @@ def __init__(self, attributes=None, format="html", filter=None, **kwargs):
         ...     paper_names = {"lama11": "LAMA 2011", "fdss_sat1": "FDSS 1"}
         ...     run["algorithm"] = paper_names[name]
         ...     return run
+        ...
 
         >>> # We want LAMA 2011 to be the leftmost column.
         >>> # filter_* filters are evaluated last, so we use the updated
@@ -471,6 +474,7 @@ def __init__(self, title="", min_wins=None, colored=False, digits=2):
         >>> t.add_row("prob2", {"cfg1": 15, "cfg2": 25})
         >>> def remove_quotes(s):
         ...     return s.replace('""', "")
+        ...
         >>> print(remove_quotes(str(t)))
         || expansions |  cfg1 |  cfg2 |
          | prob1 |  10 |  20 |

diff --git a/lab/reports/filter.py b/lab/reports/filter.py
@@ -9,6 +9,7 @@ class FilterReport(Report):
 
     >>> def remove_openstacks(run):
     ...     return "openstacks" not in run["domain"]
+    ...
 
     >>> from lab.experiment import Experiment
     >>> report = FilterReport(filter=remove_openstacks)

diff --git a/lab/tools.py b/lab/tools.py
@@ -2,6 +2,7 @@
 import colorsys
 import functools
 import logging
+import math
 import os
 from pathlib import Path
 import pkgutil
@@ -233,6 +234,21 @@ def add_unexplained_error(dictionary, error):
         dictionary[key].append(error)
 
 
+def compute_log_score(success, value, lower_bound, upper_bound):
+    """Compute score between 0 and 1.
+
+    Best possible performance (value <= lower_bound) counts as 1, while failed
+    runs (!success) and worst performance (value >= upper_bound) counts as 0.
+    """
+    if value is None or not success:
+        return 0.0
+    value = max(value, lower_bound)
+    value = min(value, upper_bound)
+    raw_score = math.log(value) - math.log(upper_bound)
+    best_raw_score = math.log(lower_bound) - math.log(upper_bound)
+    return raw_score / best_raw_score
+
+
 class Properties(dict):
     class _PropertiesEncoder(json.JSONEncoder):
         def default(self, o):

diff --git a/tox.ini b/tox.ini
@@ -48,8 +48,8 @@ commands =
 [testenv:style]
 skipsdist = true
 deps =
-  black==20.8b0
-  blackdoc==0.1.2
+  black==22.3.0
+  blackdoc==0.3.4
   flake8
   flake8-2020
   flake8-bugbear
@@ -64,8 +64,8 @@ commands =
 [testenv:fix-style]
 skipsdist = true
 deps =
-  black==20.8b0
-  blackdoc==0.1.2
+  black==22.3.0
+  blackdoc==0.3.4
   isort>=5.0,<5.1
   pyupgrade==2.18.3
 commands =