Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 42 additions & 18 deletions dev/archery/archery/benchmark/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import glob
import json
import os
import random
import re

from .core import BenchmarkSuite
Expand All @@ -26,8 +27,22 @@
from ..lang.cpp import CppCMakeDefinition, CppConfiguration
from ..lang.java import JavaMavenDefinition, JavaConfiguration
from ..utils.cmake import CMakeBuild
from ..utils.git import git
from ..utils.maven import MavenBuild
from ..utils.logger import logger
from ..utils.source import ArrowSources


def _rev_or_path_dirname(src, rev_or_path):
if rev_or_path == ArrowSources.WORKSPACE:
return rev_or_path
try:
sha = git.rev_parse(rev_or_path, git_dir=src.path)
if isinstance(sha, bytes):
sha = sha.decode("ascii")
return sha
except Exception:
return rev_or_path.replace("/", "_")


def regex_filter(re_expr):
Expand All @@ -47,6 +62,7 @@ def __init__(self, suite_filter=None, benchmark_filter=None,
self.benchmark_filter = benchmark_filter
self.repetitions = repetitions
self.repetition_min_time = repetition_min_time
self.results_dir = None

@property
def suites(self):
Expand Down Expand Up @@ -108,11 +124,14 @@ def __repr__(self):
class CppBenchmarkRunner(BenchmarkRunner):
""" Run suites from a CMakeBuild. """

def __init__(self, build, benchmark_extras, **kwargs):
def __init__(self, build, benchmark_extras, run_id=None,
results_dir=None, **kwargs):
""" Initialize a CppBenchmarkRunner. """
super().__init__(**kwargs)
self.build = build
self.benchmark_extras = benchmark_extras
super().__init__(**kwargs)
self.run_id = run_id
self.results_dir = results_dir

@staticmethod
def default_configuration(**kwargs):
Expand Down Expand Up @@ -217,19 +236,25 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
build = CMakeBuild.from_path(rev_or_path)
return CppBenchmarkRunner(build, **kwargs)
else:
# Revisions can references remote via the `/` character, ensure
# that the revision is path friendly
path_rev = rev_or_path.replace("/", "_")
path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
os.mkdir(root_rev)
os.makedirs(root_rev, exist_ok=True)

clone_dir = os.path.join(root_rev, "arrow")
# Possibly checkout the sources at given revision, no need to
# perform cleanup on cloned repository as root_rev is reclaimed.
src_rev, _ = src.at_revision(rev_or_path, clone_dir)
if os.path.isdir(clone_dir):
src_rev = ArrowSources(clone_dir)
else:
src_rev, _ = src.at_revision(rev_or_path, clone_dir)
Comment on lines +244 to +247
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Safe to reuse because we are in a subfolder under the full commit hash

cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
build_dir = os.path.join(root_rev, "build")
return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
run_root = os.path.join(root_rev, "build", "run")
os.makedirs(run_root, exist_ok=True)
run_id = f"{random.randrange(16**8):08x}"
build_dir = os.path.join(run_root, run_id)
build = cmake_def.build(build_dir)
results_dir = os.path.join(root_rev, "bench", "run", run_id)
os.makedirs(results_dir, exist_ok=True)
return CppBenchmarkRunner(build, run_id=run_id,
results_dir=results_dir, **kwargs)


class JavaBenchmarkRunner(BenchmarkRunner):
Expand Down Expand Up @@ -310,16 +335,15 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
maven_def = JavaMavenDefinition(rev_or_path, maven_conf)
return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs)
else:
# Revisions can references remote via the `/` character, ensure
# that the revision is path friendly
path_rev = rev_or_path.replace("/", "_")
path_rev = _rev_or_path_dirname(src, rev_or_path)
root_rev = os.path.join(root, path_rev)
os.mkdir(root_rev)
os.makedirs(root_rev, exist_ok=True)

clone_dir = os.path.join(root_rev, "arrow")
# Possibly checkout the sources at given revision, no need to
# perform cleanup on cloned repository as root_rev is reclaimed.
src_rev, _ = src.at_revision(rev_or_path, clone_dir)
if os.path.isdir(clone_dir):
src_rev = ArrowSources(clone_dir)
else:
src_rev, _ = src.at_revision(rev_or_path, clone_dir)
maven_def = JavaMavenDefinition(src_rev.java, maven_conf)
build_dir = os.path.join(root_rev, "arrow/java")
return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs)
68 changes: 53 additions & 15 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

from io import StringIO
import click
import datetime
import json
import logging
import os
import pathlib
import platform
import sys

from .benchmark.codec import JsonEncoder
Expand All @@ -41,6 +43,27 @@
BOOL = ArrowBool()


def _run_metadata(ctx):
uname = platform.uname()
return {
"time": datetime.datetime.now(datetime.timezone.utc).isoformat(),
"cmd": {
"full": " ".join(sys.argv),
"params": ctx.params,
},
"machine_info": {
"platform": platform.platform(),
"system": uname.system,
"release": uname.release,
"version": uname.version,
"machine": platform.machine(),
"processor": platform.processor(),
"architecture": platform.architecture()[0],
"logical_cores": os.cpu_count(),
},
}


@click.group(context_settings={"help_option_names": ["-h", "--help"]})
@click.option("--debug", type=BOOL, is_flag=True, default=False,
envvar='ARCHERY_DEBUG',
Expand Down Expand Up @@ -305,6 +328,11 @@ def check_language(ctx, param, value):
click.option("--preserve", type=BOOL, default=False, show_default=True,
is_flag=True,
help="Preserve workspace for investigation."),
click.option("--preserve-dir", metavar="<path>",
type=click.Path(file_okay=False, resolve_path=True),
default=None,
help="Parent directory in which to create the preserved "
"workspace. Has no effect without --preserve."),
click.option("--output", metavar="<output>",
type=click.File("w", encoding="utf8"), default=None,
help="Capture output result into file."),
Expand Down Expand Up @@ -347,12 +375,12 @@ def benchmark_filter_options(cmd):
default="WORKSPACE", required=False)
@benchmark_common_options
@click.pass_context
def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
java_home, java_options, build_extras, benchmark_extras,
cpp_benchmark_extras, language, **kwargs):
def benchmark_list(ctx, rev_or_path, src, preserve, preserve_dir, output,
cmake_extras, java_home, java_options, build_extras,
benchmark_extras, cpp_benchmark_extras, language, **kwargs):
""" List benchmark suite.
"""
with tmpdir(preserve=preserve) as root:
with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root:
logger.debug(f"Running benchmark {rev_or_path}")

if language == "cpp":
Expand Down Expand Up @@ -392,10 +420,11 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
"Currently only supported for language=cpp. "
"[default: use runner-specific defaults]"))
@click.pass_context
def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
java_home, java_options, build_extras, benchmark_extras,
language, suite_filter, benchmark_filter, repetitions,
repetition_min_time, cpp_benchmark_extras, **kwargs):
def benchmark_run(ctx, rev_or_path, src, preserve, preserve_dir, output,
cmake_extras, java_home, java_options, build_extras,
benchmark_extras, language, suite_filter, benchmark_filter,
repetitions, repetition_min_time, cpp_benchmark_extras,
**kwargs):
""" Run benchmark suite.

This command will run the benchmark suite for a single build. This is
Expand Down Expand Up @@ -433,7 +462,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
\b
archery benchmark run --output=run.json
"""
with tmpdir(preserve=preserve) as root:
with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root:
logger.debug(f"Running benchmark {rev_or_path}")

if language == "cpp":
Expand Down Expand Up @@ -465,6 +494,15 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
# when asked to JSON-serialize the results, so produce a JSON
# output even when none is requested.
json_out = json.dumps(runner_base, cls=JsonEncoder)
if runner_base.results_dir is not None:
results_path = os.path.join(runner_base.results_dir,
"benchmark.json")
with open(results_path, "w") as f:
f.write(json_out)
metadata_path = os.path.join(runner_base.results_dir,
"metadata.json")
with open(metadata_path, "w") as f:
json.dump(_run_metadata(ctx), f, indent=2, default=str)
if output is not None:
output.write(json_out)

Expand All @@ -486,11 +524,11 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
@click.argument("baseline", metavar="[<baseline>]]", default="origin/HEAD",
required=False)
@click.pass_context
def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
suite_filter, benchmark_filter, repetitions, no_counters,
java_home, java_options, build_extras, benchmark_extras,
cpp_benchmark_extras, threshold, contender, baseline,
**kwargs):
def benchmark_diff(ctx, src, preserve, preserve_dir, output, language,
cmake_extras, suite_filter, benchmark_filter, repetitions,
no_counters, java_home, java_options, build_extras,
benchmark_extras, cpp_benchmark_extras, threshold,
contender, baseline, **kwargs):
"""Compare (diff) benchmark runs.

This command acts like git-diff but for benchmark results.
Expand Down Expand Up @@ -564,7 +602,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
# This should not recompute the benchmark from run.json
archery --quiet benchmark diff WORKSPACE run.json > result.json
"""
with tmpdir(preserve=preserve) as root:
with tmpdir(preserve=preserve, preserve_dir=preserve_dir) as root:
logger.debug(f"Comparing {contender} (contender) with {baseline} (baseline)")

if language == "cpp":
Expand Down
8 changes: 6 additions & 2 deletions dev/archery/archery/utils/tmpdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
# specific language governing permissions and limitations
# under the License.

import os
from contextlib import contextmanager
from tempfile import mkdtemp, TemporaryDirectory


@contextmanager
def tmpdir(preserve=False, prefix="arrow-archery-"):
if preserve:
def tmpdir(preserve=False, prefix="arrow-archery-", preserve_dir=None):
if preserve and preserve_dir is not None:
os.makedirs(preserve_dir, exist_ok=True)
yield preserve_dir
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Working as intended. We want to have the same preserve dir to store multiple runs inside.
The argument could be made for Path input but it not as clear why users would want to use such a option with the same path again.

elif preserve:
yield mkdtemp(prefix=prefix)
else:
with TemporaryDirectory(prefix=prefix) as tmp:
Expand Down