From ce20cf5987168ce3fafc84b9fdbc8f839c0097b9 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 10:01:11 +0100
Subject: [PATCH 1/8] [tests] Mark multiprocessing test as flaky.

Fixes #407.
---
 tests/llvm/BUILD                   | 1 -
 tests/llvm/multiprocessing_test.py | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index 6a8f19151..de6f62991 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -217,7 +217,6 @@ py_test(
 
 py_test(
     name = "multiprocessing_test",
-    timeout = "short",
     srcs = ["multiprocessing_test.py"],
     flaky = 1,
     deps = [
diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py
index e241dcd82..bca182aba 100644
--- a/tests/llvm/multiprocessing_test.py
+++ b/tests/llvm/multiprocessing_test.py
@@ -9,6 +9,7 @@
 
 import gym
 import pytest
+from flaky import flaky
 
 from compiler_gym.envs import LlvmEnv
 from tests.pytest_plugins.common import macos_only
@@ -37,6 +38,7 @@ def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue):
     queue.put((env, observation, reward, done, info))
 
 
+@flaky  # Test contains timeouts.
 def test_running_environment_in_background_process():
     """Test launching and running an LLVM environment in a background process."""
     queue = mp.Queue(maxsize=3)
@@ -46,8 +48,8 @@ def test_running_environment_in_background_process():
     )
     process.start()
     try:
-        process.join(timeout=10)
-        result = queue.get(timeout=10)
+        process.join(timeout=60)
+        result = queue.get(timeout=60)
         observation, reward, done, info = result
 
         assert not done

From 65aa833ea7c4d69cffbe26410c102e2a0a035ca2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 10:11:26 +0100
Subject: [PATCH 2/8] [tests] Adjust timeouts.

---
 tests/bin/BUILD  | 1 -
 tests/llvm/BUILD | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/bin/BUILD b/tests/bin/BUILD
index 55d053379..73faeee90 100644
--- a/tests/bin/BUILD
+++ b/tests/bin/BUILD
@@ -16,7 +16,6 @@ py_test(
 
 py_test(
     name = "manual_env_bin_test",
-    timeout = "short",
     srcs = ["manual_env_bin_test.py"],
     flaky = 1,
     deps = [
diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index de6f62991..a7407d625 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -118,8 +118,9 @@ py_test(
 
 py_test(
     name = "fresh_environment_observation_reward_test",
+    timeout = "long",
     srcs = ["fresh_environment_observation_reward_test.py"],
-    shard_count = 8,
+    shard_count = 12,
     deps = [
         "//compiler_gym/envs",
         "//tests:test_main",

From 0304301cf2378e241d65ff7ff3b2b125e1aa2163 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 10:11:37 +0100
Subject: [PATCH 3/8] [tests] Mark flaky test.

---
 tests/llvm/fresh_environment_observation_reward_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/llvm/fresh_environment_observation_reward_test.py b/tests/llvm/fresh_environment_observation_reward_test.py
index e1cf71018..8e5957e9d 100644
--- a/tests/llvm/fresh_environment_observation_reward_test.py
+++ b/tests/llvm/fresh_environment_observation_reward_test.py
@@ -4,12 +4,15 @@
 # LICENSE file in the root directory of this source tree.
 """Integrations tests for the LLVM CompilerGym environments."""
 
+from flaky import flaky
+
 from compiler_gym.envs import CompilerEnv
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.llvm"]
 
 
+@flaky  # Runtime can timeout
 def test_step(env: CompilerEnv, observation_space: str, reward_space: str):
     """Request every combination of observation and reward in a fresh environment."""
     env.reward_space = None

From f7624fec1244053de6aec5bc880f40551d096b96 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 11:24:02 +0100
Subject: [PATCH 4/8] [tests] Remove unused gvn_sink tests.

Issue #46.
---
 tests/llvm/BUILD            | 10 ------
 tests/llvm/gvn_sink_test.py | 65 -------------------------------------
 2 files changed, 75 deletions(-)
 delete mode 100644 tests/llvm/gvn_sink_test.py

diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index a7407d625..6a5ca4bd5 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -138,16 +138,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "gvn_sink_test",
-    srcs = ["gvn_sink_test.py"],
-    deps = [
-        "//compiler_gym/envs",
-        "//tests:test_main",
-        "//tests/pytest_plugins:llvm",
-    ],
-)
-
 py_test(
     name = "gym_interface_compatability",
     timeout = "short",
diff --git a/tests/llvm/gvn_sink_test.py b/tests/llvm/gvn_sink_test.py
deleted file mode 100644
index df5c592d8..000000000
--- a/tests/llvm/gvn_sink_test.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Tests for action space determinism."""
-import hashlib
-
-import pytest
-
-from compiler_gym.envs import LlvmEnv
-from tests.test_main import main
-
-pytest_plugins = ["tests.pytest_plugins.llvm"]
-
-ACTION_REPTITION_COUNT = 50
-
-
-@pytest.mark.skip(reason="github.com/facebookresearch/CompilerGym/issues/46")
-@pytest.mark.parametrize(
-    "benchmark_name",
-    [
-        "benchmark://cbench-v1/adpcm",
-        "benchmark://cbench-v1/bitcount",
-        "benchmark://cbench-v1/blowfish",
-        "benchmark://cbench-v1/bzip2",
-        "benchmark://cbench-v1/ghostscript",
-        "benchmark://cbench-v1/gsm",
-        "benchmark://cbench-v1/ispell",
-        "benchmark://cbench-v1/jpeg-c",
-        "benchmark://cbench-v1/jpeg-d",
-        "benchmark://cbench-v1/patricia",
-        "benchmark://cbench-v1/rijndael",
-        "benchmark://cbench-v1/stringsearch",
-        "benchmark://cbench-v1/stringsearch2",
-        "benchmark://cbench-v1/susan",
-        "benchmark://cbench-v1/tiff2bw",
-        "benchmark://cbench-v1/tiff2rgba",
-        "benchmark://cbench-v1/tiffdither",
-        "benchmark://cbench-v1/tiffmedian",
-    ],
-)
-def test_gvn_sink_non_determinism(env: LlvmEnv, benchmark_name: str):
-    """Regression test for -gvn-sink non-determinism.
-    See: https://github.com/facebookresearch/CompilerGym/issues/46
-    """
-    env.observation_space = "Ir"
-
-    checksums = set()
-    for i in range(1, ACTION_REPTITION_COUNT + 1):
-        env.reset(benchmark=benchmark_name)
-        ir, _, done, _ = env.step(env.action_space.names.index("-gvn-sink"))
-        assert not done
-        sha1 = hashlib.sha1()
-        sha1.update(ir.encode("utf-8"))
-        checksums.add(sha1.hexdigest())
-
-        if len(checksums) != 1:
-            pytest.fail(
-                f"Repeating the -gvn-sink action {i} times on {benchmark_name} "
-                "produced different states"
-            )
-
-
-if __name__ == "__main__":
-    main()

From 7a81e2d682f5a7d4bbdb5711754ec07f0439d275 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 11:37:14 +0100
Subject: [PATCH 5/8] [tests] Mark flaky tests.

---
 tests/llvm/observation_spaces_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py
index 25678f927..4e7facf2f 100644
--- a/tests/llvm/observation_spaces_test.py
+++ b/tests/llvm/observation_spaces_test.py
@@ -10,6 +10,7 @@
 import networkx as nx
 import numpy as np
 import pytest
+from flaky import flaky
 from gym.spaces import Box
 from gym.spaces import Dict as DictSpace
 
@@ -1165,6 +1166,7 @@ def test_object_text_size_observation_spaces(env: LlvmEnv):
     assert value == crc32_code_sizes[sys.platform][2]
 
 
+@flaky  # Runtimes can timeout
 def test_runtime_observation_space(env: LlvmEnv):
     env.reset("cbench-v1/crc32")
     key = "Runtime"
@@ -1188,6 +1190,7 @@ def test_runtime_observation_space(env: LlvmEnv):
     assert len(set(value)) > 1
 
 
+@flaky  # Runtimes can timeout
 def test_runtime_observation_space_different_observation_count(env: LlvmEnv):
     """Test setting a custom observation count for LLVM runtimes."""
     env.reset("cbench-v1/crc32")
@@ -1208,6 +1211,7 @@ def test_runtime_observation_space_different_observation_count(env: LlvmEnv):
     assert value.shape == (5,)
 
 
+@flaky  # Runtimes can timeout
 def test_runtime_observation_space_invalid_observation_count(env: LlvmEnv):
     """Test setting an invalid custom observation count for LLVM runtimes."""
     env.reset("cbench-v1/crc32")
@@ -1233,6 +1237,7 @@ def test_runtime_observation_space_not_runnable(env: LlvmEnv):
     assert space.space.contains(value)
 
 
+@flaky  # Build can timeout
 def test_buildtime_observation_space(env: LlvmEnv):
     env.reset("cbench-v1/crc32")
     key = "Buildtime"

From cc53d5cecf889edc88ea1af0cc14df62eba459df Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 11:42:30 +0100
Subject: [PATCH 6/8] [Makefile] Don't print coverage report by default.

---
 Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index d5e1341ae..3c4558933 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,8 @@ Post-installation Tests
         The same as `make install-test`, but with python test coverage
         reporting. A summary of test coverage is printed at the end of execution
         and the full details are recorded in a coverage.xml file in the project
-        root directory.
+        root directory. To print a report of file coverage to stdout at the end
+        of testing, use argument `PYTEST_ARGS="--cov-report=term"`.
 
     make install-fuzz
         Run the fuzz testing suite against an installed CompilerGym package.
@@ -306,7 +307,7 @@ install-test: install-test-setup
 # environement. This is to ensure that the reported coverage matches that of
 # the value on: https://codecov.io/gh/facebookresearch/CompilerGym
 install-test-cov: install-test-setup
-	export CI=1; $(call pytest,--no-success-flaky-report --benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml:$(COV_REPORT) --cov-report=term)
+	export CI=1; $(call pytest,--no-success-flaky-report --benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml:$(COV_REPORT))
 
 # The minimum number of seconds to run the fuzz tests in a loop for. Override
 # this at the commandline, e.g. `FUZZ_SECONDS=1800 make fuzz`.

From 296b007bcff22d3807daafa6e00554f7e055fb8a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 11:48:16 +0100
Subject: [PATCH 7/8] [tests] Permit ServiceError when 'done' is true.

---
 tests/llvm/all_actions_single_step_test.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/llvm/all_actions_single_step_test.py b/tests/llvm/all_actions_single_step_test.py
index 5e64a34f6..33bfe3ca5 100644
--- a/tests/llvm/all_actions_single_step_test.py
+++ b/tests/llvm/all_actions_single_step_test.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 from compiler_gym.envs import CompilerEnv
+from compiler_gym.service.connection import ServiceError
 from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM
 from tests.test_main import main
 
@@ -25,6 +26,13 @@ def test_step(env: CompilerEnv, action_name: str):
     assert isinstance(reward, float)
     assert isinstance(done, bool)
 
+    try:
+        env.close()
+    except ServiceError as e:
+        # env.close() will raise an error if the service terminated
+        # ungracefully. In that case, the "done" flag should have been set.
+        assert done, f"Service error was raised when 'done' flag not set: {e}"
+
 
 if __name__ == "__main__":
     main()

From 348f6ff762c8a111751dd8dfd8d8711b7514f96a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 22 Sep 2021 11:52:09 +0100
Subject: [PATCH 8/8] [env] Fix comment string wording.

---
 compiler_gym/envs/compiler_env.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index db3e712c6..764ddc47e 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -894,7 +894,8 @@ def raw_step(
                 self.close()
             except ServiceError as e:
                 # close() can raise ServiceError if the service exists with a
-                # non-zero return code. If so,
+                # non-zero return code. We swallow the error here but propagate
+                # the diagnostic message.
                 info[
                     "error_details"
                 ] += f". Additional error during environment closing: {e}"