From ce20cf5987168ce3fafc84b9fdbc8f839c0097b9 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 10:01:11 +0100 Subject: [PATCH 1/8] [tests] Mark multiprocessing test as flaky. Fixes #407. --- tests/llvm/BUILD | 1 - tests/llvm/multiprocessing_test.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD index 6a8f19151..de6f62991 100644 --- a/tests/llvm/BUILD +++ b/tests/llvm/BUILD @@ -217,7 +217,6 @@ py_test( py_test( name = "multiprocessing_test", - timeout = "short", srcs = ["multiprocessing_test.py"], flaky = 1, deps = [ diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py index e241dcd82..bca182aba 100644 --- a/tests/llvm/multiprocessing_test.py +++ b/tests/llvm/multiprocessing_test.py @@ -9,6 +9,7 @@ import gym import pytest +from flaky import flaky from compiler_gym.envs import LlvmEnv from tests.pytest_plugins.common import macos_only @@ -37,6 +38,7 @@ def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue): queue.put((env, observation, reward, done, info)) +@flaky # Test contains timeouts. def test_running_environment_in_background_process(): """Test launching and running an LLVM environment in a background process.""" queue = mp.Queue(maxsize=3) @@ -46,8 +48,8 @@ def test_running_environment_in_background_process(): ) process.start() try: - process.join(timeout=10) - result = queue.get(timeout=10) + process.join(timeout=60) + result = queue.get(timeout=60) observation, reward, done, info = result assert not done From 65aa833ea7c4d69cffbe26410c102e2a0a035ca2 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 10:11:26 +0100 Subject: [PATCH 2/8] [tests] Adjust timeouts. --- tests/bin/BUILD | 1 - tests/llvm/BUILD | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/bin/BUILD b/tests/bin/BUILD index 55d053379..73faeee90 100644 --- a/tests/bin/BUILD +++ b/tests/bin/BUILD @@ -16,7 +16,6 @@ py_test( py_test( name = "manual_env_bin_test", - timeout = "short", srcs = ["manual_env_bin_test.py"], flaky = 1, deps = [ diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD index de6f62991..a7407d625 100644 --- a/tests/llvm/BUILD +++ b/tests/llvm/BUILD @@ -118,8 +118,9 @@ py_test( py_test( name = "fresh_environment_observation_reward_test", + timeout = "long", srcs = ["fresh_environment_observation_reward_test.py"], - shard_count = 8, + shard_count = 12, deps = [ "//compiler_gym/envs", "//tests:test_main", From 0304301cf2378e241d65ff7ff3b2b125e1aa2163 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 10:11:37 +0100 Subject: [PATCH 3/8] [tests] Mark flaky test. --- tests/llvm/fresh_environment_observation_reward_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/llvm/fresh_environment_observation_reward_test.py b/tests/llvm/fresh_environment_observation_reward_test.py index e1cf71018..8e5957e9d 100644 --- a/tests/llvm/fresh_environment_observation_reward_test.py +++ b/tests/llvm/fresh_environment_observation_reward_test.py @@ -4,12 +4,15 @@ # LICENSE file in the root directory of this source tree. """Integrations tests for the LLVM CompilerGym environments.""" +from flaky import flaky + from compiler_gym.envs import CompilerEnv from tests.test_main import main pytest_plugins = ["tests.pytest_plugins.llvm"] +@flaky # Runtime can timeout def test_step(env: CompilerEnv, observation_space: str, reward_space: str): """Request every combination of observation and reward in a fresh environment.""" env.reward_space = None From f7624fec1244053de6aec5bc880f40551d096b96 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 11:24:02 +0100 Subject: [PATCH 4/8] [tests] Remove unused gvn_sink tests. Issue #46. --- tests/llvm/BUILD | 10 ------ tests/llvm/gvn_sink_test.py | 65 ------------------------------------- 2 files changed, 75 deletions(-) delete mode 100644 tests/llvm/gvn_sink_test.py diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD index a7407d625..6a5ca4bd5 100644 --- a/tests/llvm/BUILD +++ b/tests/llvm/BUILD @@ -138,16 +138,6 @@ py_test( ], ) -py_test( - name = "gvn_sink_test", - srcs = ["gvn_sink_test.py"], - deps = [ - "//compiler_gym/envs", - "//tests:test_main", - "//tests/pytest_plugins:llvm", - ], -) - py_test( name = "gym_interface_compatability", timeout = "short", diff --git a/tests/llvm/gvn_sink_test.py b/tests/llvm/gvn_sink_test.py deleted file mode 100644 index df5c592d8..000000000 --- a/tests/llvm/gvn_sink_test.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""Tests for action space determinism.""" -import hashlib - -import pytest - -from compiler_gym.envs import LlvmEnv -from tests.test_main import main - -pytest_plugins = ["tests.pytest_plugins.llvm"] - -ACTION_REPTITION_COUNT = 50 - - -@pytest.mark.skip(reason="github.com/facebookresearch/CompilerGym/issues/46") -@pytest.mark.parametrize( - "benchmark_name", - [ - "benchmark://cbench-v1/adpcm", - "benchmark://cbench-v1/bitcount", - "benchmark://cbench-v1/blowfish", - "benchmark://cbench-v1/bzip2", - "benchmark://cbench-v1/ghostscript", - "benchmark://cbench-v1/gsm", - "benchmark://cbench-v1/ispell", - "benchmark://cbench-v1/jpeg-c", - "benchmark://cbench-v1/jpeg-d", - "benchmark://cbench-v1/patricia", - "benchmark://cbench-v1/rijndael", - "benchmark://cbench-v1/stringsearch", - "benchmark://cbench-v1/stringsearch2", - "benchmark://cbench-v1/susan", - "benchmark://cbench-v1/tiff2bw", - "benchmark://cbench-v1/tiff2rgba", - "benchmark://cbench-v1/tiffdither", - "benchmark://cbench-v1/tiffmedian", - ], -) -def test_gvn_sink_non_determinism(env: LlvmEnv, benchmark_name: str): - """Regression test for -gvn-sink non-determinism. - See: https://github.com/facebookresearch/CompilerGym/issues/46 - """ - env.observation_space = "Ir" - - checksums = set() - for i in range(1, ACTION_REPTITION_COUNT + 1): - env.reset(benchmark=benchmark_name) - ir, _, done, _ = env.step(env.action_space.names.index("-gvn-sink")) - assert not done - sha1 = hashlib.sha1() - sha1.update(ir.encode("utf-8")) - checksums.add(sha1.hexdigest()) - - if len(checksums) != 1: - pytest.fail( - f"Repeating the -gvn-sink action {i} times on {benchmark_name} " - "produced different states" - ) - - -if __name__ == "__main__": - main() From 7a81e2d682f5a7d4bbdb5711754ec07f0439d275 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 11:37:14 +0100 Subject: [PATCH 5/8] [tests] Mark flaky tests. --- tests/llvm/observation_spaces_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py index 25678f927..4e7facf2f 100644 --- a/tests/llvm/observation_spaces_test.py +++ b/tests/llvm/observation_spaces_test.py @@ -10,6 +10,7 @@ import networkx as nx import numpy as np import pytest +from flaky import flaky from gym.spaces import Box from gym.spaces import Dict as DictSpace @@ -1165,6 +1166,7 @@ def test_object_text_size_observation_spaces(env: LlvmEnv): assert value == crc32_code_sizes[sys.platform][2] +@flaky # Runtimes can timeout def test_runtime_observation_space(env: LlvmEnv): env.reset("cbench-v1/crc32") key = "Runtime" @@ -1188,6 +1190,7 @@ def test_runtime_observation_space(env: LlvmEnv): assert len(set(value)) > 1 +@flaky # Runtimes can timeout def test_runtime_observation_space_different_observation_count(env: LlvmEnv): """Test setting a custom observation count for LLVM runtimes.""" env.reset("cbench-v1/crc32") @@ -1208,6 +1211,7 @@ def test_runtime_observation_space_different_observation_count(env: LlvmEnv): assert value.shape == (5,) +@flaky # Runtimes can timeout def test_runtime_observation_space_invalid_observation_count(env: LlvmEnv): """Test setting an invalid custom observation count for LLVM runtimes.""" env.reset("cbench-v1/crc32") @@ -1233,6 +1237,7 @@ def test_runtime_observation_space_not_runnable(env: LlvmEnv): assert space.space.contains(value) +@flaky # Build can timeout def test_buildtime_observation_space(env: LlvmEnv): env.reset("cbench-v1/crc32") key = "Buildtime" From cc53d5cecf889edc88ea1af0cc14df62eba459df Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 11:42:30 +0100 Subject: [PATCH 6/8] [Makefile] Don't print coverage report by default. --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d5e1341ae..3c4558933 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,8 @@ Post-installation Tests The same as `make install-test`, but with python test coverage reporting. A summary of test coverage is printed at the end of execution and the full details are recorded in a coverage.xml file in the project - root directory. + root directory. To print a report of file coverage to stdout at the end + of testing, use argument `PYTEST_ARGS="--cov-report=term"`. make install-fuzz Run the fuzz testing suite against an installed CompilerGym package. @@ -306,7 +307,7 @@ install-test: install-test-setup # environement. This is to ensure that the reported coverage matches that of # the value on: https://codecov.io/gh/facebookresearch/CompilerGym install-test-cov: install-test-setup - export CI=1; $(call pytest,--no-success-flaky-report --benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml:$(COV_REPORT) --cov-report=term) + export CI=1; $(call pytest,--no-success-flaky-report --benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml:$(COV_REPORT)) # The minimum number of seconds to run the fuzz tests in a loop for. Override # this at the commandline, e.g. `FUZZ_SECONDS=1800 make fuzz`. From 296b007bcff22d3807daafa6e00554f7e055fb8a Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 11:48:16 +0100 Subject: [PATCH 7/8] [tests] Permit ServiceError when 'done' is true. --- tests/llvm/all_actions_single_step_test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/llvm/all_actions_single_step_test.py b/tests/llvm/all_actions_single_step_test.py index 5e64a34f6..33bfe3ca5 100644 --- a/tests/llvm/all_actions_single_step_test.py +++ b/tests/llvm/all_actions_single_step_test.py @@ -7,6 +7,7 @@ import numpy as np from compiler_gym.envs import CompilerEnv +from compiler_gym.service.connection import ServiceError from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM from tests.test_main import main @@ -25,6 +26,13 @@ def test_step(env: CompilerEnv, action_name: str): assert isinstance(reward, float) assert isinstance(done, bool) + try: + env.close() + except ServiceError as e: + # env.close() will raise an error if the service terminated + # ungracefully. In that case, the "done" flag should have been set. + assert done, f"Service error was raised when 'done' flag not set: {e}" + if __name__ == "__main__": main() From 348f6ff762c8a111751dd8dfd8d8711b7514f96a Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Wed, 22 Sep 2021 11:52:09 +0100 Subject: [PATCH 8/8] [env] Fix comment string wording. --- compiler_gym/envs/compiler_env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py index db3e712c6..764ddc47e 100644 --- a/compiler_gym/envs/compiler_env.py +++ b/compiler_gym/envs/compiler_env.py @@ -894,7 +894,8 @@ def raw_step( self.close() except ServiceError as e: # close() can raise ServiceError if the service exists with a - # non-zero return code. If so, + # non-zero return code. We swallow the error here but propagate + # the diagnostic message. info[ "error_details" ] += f". Additional error during environment closing: {e}"