Skip to content

Commit

Permalink
Revamp Singularity example experiment: use runsolver to limit resourc…
Browse files Browse the repository at this point in the history
…e usage.
  • Loading branch information
jendrikseipp committed Nov 5, 2021
1 parent 65b54cc commit 72e3485
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 35 deletions.
12 changes: 12 additions & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
Changelog
=========

next (unreleased)
-----------------

Lab
^^^
* Revamp Singularity example experiment: use runsolver to limit resource usage (Silvan Sievers and Jendrik Seipp).

Downward Lab
^^^^^^^^^^^^
* No changes so far.


v7.0 (2021-10-24)
-----------------

Expand Down
41 changes: 41 additions & 0 deletions examples/singularity/filter-stderr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#! /usr/bin/env python

"""Filter lines from run.err that stem from "expected errors"."""

from pathlib import Path
import shutil


IGNORE_PATTERNS = [
"CPU time limit exceeded",
"std::bad_alloc",
"WARNING: will ignore action costs",
"differs from the one in the portfolio file",
"Terminated",
"Killed",
]


def main():
print("Running filter-stderr.py")
stderr = Path("run.err")
if stderr.is_file():
need_to_filter = False
filtered_content = []
with open(stderr, "r") as f:
for line in f:
if any(pattern in line for pattern in IGNORE_PATTERNS):
need_to_filter = True
else:
filtered_content.append(line)

if need_to_filter:
shutil.move(stderr, "run.err.bak")
# We write an empty file if everything has been filtered. Lab
# will remove empty run.err files later.
with open(stderr, "w") as f:
f.writelines(filtered_content)


if __name__ == "__main__":
main()
13 changes: 8 additions & 5 deletions examples/singularity/run-singularity.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@ if [ -f $PWD/$4 ]; then
exit 2
fi

# Ensure that strings like "CPU time limit exceeded" and "Killed" are in English.
export LANG=C

set +e
# Some planners print to stderr when running out of memory, so we redirect stderr to stdout.
{ /usr/bin/time -o /dev/stdout -f "Singularity runtime: %es real, %Us user, %Ss sys" \
singularity run -C -H $PWD $1 $PWD/$2 $PWD/$3 $4 ; } 2>&1
singularity run -C -H "$PWD" "$1" "$PWD/$2" "$PWD/$3" "$4"
set -e

printf "\nRun VAL\n\n"

if [ -f $PWD/$4 ]; then
echo "Found plan file."
validate $PWD/$2 $PWD/$3 $PWD/$4
validate -v "$PWD/$2" "$PWD/$3" "$PWD/$4"
exit 0
else
echo "No plan file."
validate $PWD/$2 $PWD/$3
validate -v "$PWD/$2" "$PWD/$3"
exit 99
fi
66 changes: 45 additions & 21 deletions examples/singularity/singularity-exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@
"""
Example experiment for running Singularity planner images.
Note that Downward Lab assumes that the evaluated algorithms are written
in good faith. It is not equipped to handle malicious code. For example,
it would be easy to write planner code that bypasses the time and memory
limits set within Downward Lab. If you're running untrusted code, we
recommend using cgroups to enforce resource limits.
A note on running Singularity on clusters: reading large Singularity
files over the network is not optimal, so we recommend copying the
images to a local filesystem (e.g., /tmp/) before running experiments.
The time and memory limits set with Lab can be circumvented by solvers
that fork child processes. Their resource usage is not checked. If you're
running solvers that don't check their resource usage like Fast Downward,
we recommend using cgroups or the "runsolver" tool to enforce resource
limits. Since setting time limits for solvers with cgroups is difficult,
the experiment below uses the "runsolver" tool, which has been used in
multiple SAT competitions to enforce resource limits. For the experiment
to run, the runsolver binary needs to be on the PATH. You can obtain a
runsolver copy from https://github.com/jendrikseipp/runsolver.
A note on running Singularity on clusters: reading large Singularity files
over the network is not optimal, so we recommend copying the images to a
local filesystem (e.g., /tmp/) before running experiments.
"""

import os
from pathlib import Path
import platform
import sys

from downward import suites
from downward.reports.absolute import AbsoluteReport
Expand All @@ -38,11 +44,12 @@ class BaseReport(AbsoluteReport):

NODE = platform.node()
RUNNING_ON_CLUSTER = NODE.endswith((".scicore.unibas.ch", ".cluster.bc2.ch"))
DIR = os.path.abspath(os.path.dirname(__file__))
REPO = os.path.dirname(DIR)
IMAGES_DIR = os.environ["SINGULARITY_IMAGES"]
assert os.path.isdir(IMAGES_DIR), IMAGES_DIR
DIR = Path(__file__).resolve().parent
REPO = DIR.parent
IMAGES_DIR = Path(os.environ["SINGULARITY_IMAGES"])
assert IMAGES_DIR.is_dir(), IMAGES_DIR
BENCHMARKS_DIR = os.environ["DOWNWARD_BENCHMARKS"]
MEMORY_LIMIT = 3584 # MiB
if RUNNING_ON_CLUSTER:
SUITE = ["depot", "freecell", "gripper", "zenotravel"]
ENVIRONMENT = BaselSlurmEnvironment(
Expand All @@ -66,20 +73,22 @@ class BaseReport(AbsoluteReport):
"error",
"g_values_over_time",
"run_dir",
"raw_memory",
"runtime",
"virtual_memory",
]

exp = Experiment(environment=ENVIRONMENT)
exp.add_step("build", exp.build)
exp.add_step("start", exp.start_runs)
exp.add_fetcher(name="fetch")
exp.add_parser(os.path.join(DIR, "singularity-parser.py"))
exp.add_parser(DIR / "singularity-parser.py")


def get_image(name):
planner = name.replace("-", "_")
image = os.path.join(IMAGES_DIR, name + ".img")
assert os.path.exists(image), image
image = IMAGES_DIR / (name + ".img")
assert image.is_file(), image
return planner, image


Expand All @@ -88,32 +97,47 @@ def get_image(name):
for planner, image in IMAGES:
exp.add_resource(planner, image, symlink=True)

singularity_script = os.path.join(DIR, "run-singularity.sh")
exp.add_resource("run_singularity", singularity_script)
exp.add_resource("run_singularity", DIR / "run-singularity.sh")
exp.add_resource("filter_stderr", DIR / "filter-stderr.py")

for planner, _ in IMAGES:
for task in suites.build_suite(BENCHMARKS_DIR, SUITE):
run = exp.add_run()
run.add_resource("domain", task.domain_file, "domain.pddl")
run.add_resource("problem", task.problem_file, "problem.pddl")
# Use runsolver to limit time and memory. It must be on the system
# PATH. Important: we cannot use time_limit and memory_limit of
# Lab's add_command() because setting the same memory limit with
# runsolver again using setrlimit fails.
run.add_command(
"run-planner",
[
"runsolver",
"-C",
TIME_LIMIT,
"-V",
MEMORY_LIMIT,
"-w",
"watch.log",
"-v",
"values.log",
"{run_singularity}",
f"{{{planner}}}",
"{domain}",
"{problem}",
"sas_plan",
],
time_limit=TIME_LIMIT,
memory_limit=3584,
)
# Remove temporary files from old Fast Downward versions.
run.add_command("rm-tmp-files", ["rm", "-f", "output.sas", "output"])
run.add_command("filter-stderr", [sys.executable, "{filter_stderr}"])

run.set_property("domain", task.domain)
run.set_property("problem", task.problem)
run.set_property("algorithm", planner)
run.set_property("id", [planner, task.domain, task.problem])

report = os.path.join(exp.eval_dir, f"{exp.name}.html")
report = Path(exp.eval_dir) / f"{exp.name}.html"
exp.add_report(BaseReport(attributes=ATTRIBUTES), outfile=report)

exp.run_steps()
66 changes: 57 additions & 9 deletions examples/singularity/singularity-parser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
#! /usr/bin/env python

import re
import sys

from lab.parser import Parser


def coverage(content, props):
props["coverage"] = int("cost" in props)
if not props["coverage"] and "runtime" in props:
del props["runtime"]


def unsolvable(content, props):
# Note that this may easily generate false positives.
props["unsolvable"] = int("unsolvable" in content.lower())
# Note that this naive test may easily generate false positives.
props["unsolvable"] = int("Completely explored state space -- no solution!" in content)


def parse_g_value_over_time(content, props):
Expand All @@ -24,11 +23,49 @@ def parse_g_value_over_time(content, props):
props["g_values_over_time"] = [(float(t), int(g)) for g, t in matches]


def error(content, props):
if props.get("planner_exit_code") == 0:
props["error"] = "none"
def set_outcome(content, props):
lines = content.splitlines()
solved = props["coverage"]
unsolvable = props["unsolvable"]
out_of_time = int("TIMEOUT=true" in lines)
out_of_memory = int("MEMOUT=true" in lines)
# runsolver decides "out of time" based on CPU rather than (cumulated)
# WCTIME.
if (
not solved and
not unsolvable and
not out_of_time and
not out_of_memory and
props["runtime"] > props["time_limit"]
):
out_of_time = 1
# In cases where CPU time is very slightly above the threshold so that
# runsolver didn't kill the planner yet and the planner solved a task
# just within the limit, runsolver will still record an "out of time".
# We remove this record. This case also applies to iterative planners.
# If such planners solve the task, we don't treat them as running out
# of time.
if (solved or unsolvable) and (out_of_time or out_of_memory):
print("task solved however runsolver recorded an out_of_*")
print(props)
out_of_time = 0
out_of_memory = 0

if not solved and not unsolvable:
props["runtime"] = None

if solved ^ unsolvable ^ out_of_time ^ out_of_memory:
if solved:
props["error"] = "solved"
elif unsolvable:
props["error"] = "unsolvable"
elif out_of_time:
props["error"] = "out_of_time"
elif out_of_memory:
props["error"] = "out_of_memory"
else:
props["error"] = "some-error-occured"
print(f"unexpected error: {props}", file=sys.stderr)
props["error"] = "unexpected-error"


def main():
Expand All @@ -52,12 +89,23 @@ def main():
required=True,
)
parser.add_pattern("runtime", r"Singularity runtime: (.+?)s", type=float)
parser.add_pattern(
"time_limit", r"Enforcing CPUTime limit \(soft limit, will send SIGTERM then SIGKILL\): (\d+) seconds",
type=int, file="watch.log", required=True
)
# Cumulative runtime and virtual memory of the solver and all child processes.
parser.add_pattern(
"runtime", r"WCTIME=(.+)", type=float, file="values.log", required=True
)
parser.add_pattern(
"virtual_memory", r"MAXVM=(\d+)", type=int, file="values.log", required=True
)
parser.add_pattern("raw_memory", r"Peak memory: (\d+) KB", type=int)
parser.add_pattern("cost", r"\nFinal value: (.+)\n", type=int)
parser.add_function(coverage)
parser.add_function(unsolvable)
parser.add_function(error)
parser.add_function(parse_g_value_over_time)
parser.add_function(set_outcome, file="values.log")
parser.parse()


Expand Down

0 comments on commit 72e3485

Please sign in to comment.