easybuilders · boegel · Jan 12, 2023 · Jan 12, 2023 · Jan 12, 2023 · Jan 12, 2023
diff --git a/easybuild/easyblocks/p/pytorch.py b/easybuild/easyblocks/p/pytorch.py
@@ -32,6 +32,7 @@
 import re
 import tempfile
 import easybuild.tools.environment as env
+from collections import namedtuple
 from distutils.version import LooseVersion
 from easybuild.easyblocks.generic.pythonpackage import PythonPackage
 from easybuild.framework.easyconfig import CUSTOM
@@ -42,6 +43,94 @@
 from easybuild.tools.systemtools import POWER, get_cpu_architecture
 
 
+def get_count_for_pattern(regex, text):
+    """Match the regexp containing a single group and return the integer value of the matched group.
+       Return zero if no or more than 1 match was found and warn for the latter case
+    """
+    match = re.findall(regex, text)
+    if len(match) == 1:
+        return int(match[0])
+    elif len(match) > 1:
+        # Shouldn't happen, but means something went wrong with the regular expressions.
+        # Throw warning, as the build might be fine, no need to error on this.
+        warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
+        warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
+        print_warning(warn_msg)
+    return 0
+
+
+def extract_failed_tests_info(tests_out):
+    """
+    Extract information about failed tests from output produced by PyTorch tests
+    """
+
+    # Create clear summary report
+    failure_report = ""
+    failure_cnt = 0
+    error_cnt = 0
+    failed_test_suites = []
+
+    # Look for patterns like:
+    # Ran 219 tests in 67.325s
+    #
+    # FAILED (errors=10, skipped=190, expected failures=6)
+    # test_fx failed!
+    regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
+             r"FAILED \((?P<failure_summary>.*)\)$\n"
+             r"(?:^(?:(?!failed!).)*$\n)*"
+             r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")
+
+    for match in re.finditer(regex, tests_out, re.M):
+        # E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
+        failure_summary, test_cnt, test_suite = match.group('failure_summary', 'test_cnt', 'failed_test_suite_name')
+        failure_report += test_suite + ' (' + test_cnt + " total tests, " + failure_summary + ')\n'
+        failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
+        error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
+        failed_test_suites.append(test_suite)
+
+    # Look for patterns like:
+    # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
+    regex = r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* =+$\n(?P<failed_test_suite_name>.*) failed!$"
+
+    for match in re.finditer(regex, tests_out, re.M):
+        # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
+        failure_summary, test_suite = match.group('failure_summary', 'failed_test_suite_name')
+        failure_report += test_suite + ' ' + failure_summary + '\n'
+        failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
+        error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
+        failed_test_suites.append(test_suite)
+
+    # Look for patterns like:
+    # AssertionError: 4 unit test(s) failed:
+    # ...
+    # distributed/test_c10d_gloo failed!
+    regex = (r"^AssertionError: (?P<failed_test_cnt>[0-9]+) unit test\(s\) failed:$\n"
+             r"(?:^(?:(?!failed!).)*$\n)*"
+             r"(?P<failed_test_group_name>.*) failed!$")
+    for match in re.finditer(regex, tests_out, re.M):
+        test_group, failed_test_cnt = match.group('failed_test_group_name', 'failed_test_cnt')
+        failure_report += test_group + ' (' + failed_test_cnt + " failed tests)\n"
+        failure_cnt += int(failed_test_cnt)
+        failed_test_suites.append(test_group)
+
+    # Look for patterns like:
+    # Running test_jit_cuda_fuser ... [2023-01-12 04:04:08.949222]
+    # ...
+    # AttributeError: 'NoneType' object has no attribute 'split'
+    # test_jit_cuda_fuser failed!
+    regex = (r"^Running (?P<test_name>test_[^\s]+) .*\n"
+             r"(?:^(?:(?!(Ran [0-9]+|failed!)).)*$\n)*"
+             r"\1 failed!$")
+    for match in re.finditer(regex, tests_out, re.M):
+        test_name = match.group('test_name')
+        failure_report += test_name + " (unknown failed test count)\n"
+        failure_cnt += 1
+        failed_test_suites.append(test_name)
+
+    TestsInfo = namedtuple('TestsInfo', ('failure_report', 'failure_cnt', 'error_cnt', 'failed_test_suites'))
+    return TestsInfo(failure_report, failure_cnt, error_cnt, failed_test_suites)
+
+
 class EB_PyTorch(PythonPackage):
     """Support for building/installing PyTorch."""
 
@@ -268,65 +357,11 @@ def test_step(self):
 
         tests_out, tests_ec = super(EB_PyTorch, self).test_step(return_output_ec=True)
 
-        def get_count_for_pattern(regex, text):
-            """Match the regexp containing a single group and return the integer value of the matched group.
-               Return zero if no or more than 1 match was found and warn for the latter case
-            """
-            match = re.findall(regex, text)
-            if len(match) == 1:
-                return int(match[0])
-            elif len(match) > 1:
-                # Shouldn't happen, but means something went wrong with the regular expressions.
-                # Throw warning, as the build might be fine, no need to error on this.
-                warn_msg = "Error in counting the number of test failures in the output of the PyTorch test suite.\n"
-                warn_msg += "Please check the EasyBuild log to verify the number of failures (if any) was acceptable."
-                print_warning(warn_msg)
-            return 0
-
-        # Create clear summary report
-        failure_report = ""
-        failure_cnt = 0
-        error_cnt = 0
-        failed_test_suites = []
-
-        # Grep for patterns like:
-        # Ran 219 tests in 67.325s
-        #
-        # FAILED (errors=10, skipped=190, expected failures=6)
-        # test_fx failed!
-        regex = (r"^Ran (?P<test_cnt>[0-9]+) tests.*$\n\n"
-                 r"FAILED \((?P<failure_summary>.*)\)$\n"
-                 r"(?:^(?:(?!failed!).)*$\n)*"
-                 r"(?P<failed_test_suite_name>.*) failed!(?: Received signal: \w+)?\s*$")
-
-        for m in re.finditer(regex, tests_out, re.M):
-            # E.g. 'failures=3, errors=10, skipped=190, expected failures=6'
-            failure_summary = m.group('failure_summary')
-            total, test_suite = m.group('test_cnt', 'failed_test_suite_name')
-            failure_report += "{test_suite} ({total} total tests, {failure_summary})\n".format(
-                    test_suite=test_suite, total=total, failure_summary=failure_summary
-                )
-            failure_cnt += get_count_for_pattern(r"(?<!expected )failures=([0-9]+)", failure_summary)
-            error_cnt += get_count_for_pattern(r"errors=([0-9]+)", failure_summary)
-            failed_test_suites.append(test_suite)
-
-        # Grep for patterns like:
-        # ===================== 2 failed, 128 passed, 2 skipped, 2 warnings in 3.43s =====================
-        regex = r"^=+ (?P<failure_summary>.*) in [0-9]+\.*[0-9]*[a-zA-Z]* =+$\n(?P<failed_test_suite_name>.*) failed!$"
-
-        for m in re.finditer(regex, tests_out, re.M):
-            # E.g. '2 failed, 128 passed, 2 skipped, 2 warnings'
-            failure_summary = m.group('failure_summary')
-            test_suite = m.group('failed_test_suite_name')
-            failure_report += "{test_suite} ({failure_summary})\n".format(
-                    test_suite=test_suite, failure_summary=failure_summary
-                )
-            failure_cnt += get_count_for_pattern(r"([0-9]+) failed", failure_summary)
-            error_cnt += get_count_for_pattern(r"([0-9]+) error", failure_summary)
-            failed_test_suites.append(test_suite)
+        failed_tests_info = extract_failed_tests_info(tests_out)
+        failure_report = failed_tests_info.failure_report
 
         # Make the names unique and sorted
-        failed_test_suites = sorted(set(failed_test_suites))
+        failed_test_suites = sorted(set(failed_tests_info.failed_test_suites))
         # Gather all failed tests suites in case we missed any (e.g. when it exited due to syntax errors)
         # Also unique and sorted to be able to compare the lists below
         all_failed_test_suites = sorted(set(
@@ -341,17 +376,17 @@ def get_count_for_pattern(regex, text):
                 failure_report += '\n' + failure_report_save
 
         # Calculate total number of unsuccesful and total tests
-        failed_test_cnt = failure_cnt + error_cnt
+        failed_test_cnt = failed_tests_info.failure_cnt + failed_tests_info.error_cnt
         test_cnt = sum(int(hit) for hit in re.findall(r"^Ran (?P<test_cnt>[0-9]+) tests in", tests_out, re.M))
+        max_failed_tests = self.cfg['max_failed_tests']
+        self.log.info("%d unsuccessful tests (out of %d), - max. failed tests set to %d",
+                      failed_test_cnt, test_cnt, max_failed_tests)
 
         if failed_test_cnt > 0:
-            max_failed_tests = self.cfg['max_failed_tests']
-
-            failure_or_failures = 'failure' if failure_cnt == 1 else 'failures'
-            error_or_errors = 'error' if error_cnt == 1 else 'errors'
-            msg = "%d test %s, %d test %s (out of %d):\n" % (
-                failure_cnt, failure_or_failures, error_cnt, error_or_errors, test_cnt
-            )
+            failure_or_failures = 'failure' if failed_tests_info.failure_cnt == 1 else 'failures'
+            msg = "%d test %s, " % (failed_tests_info.failure_cnt, failure_or_failures)
+            error_or_errors = 'error' if failed_tests_info.error_cnt == 1 else 'errors'
+            msg += "%d test %s (out of %d):\n" % (failed_tests_info.error_cnt, error_or_errors, test_cnt)
             msg += failure_report
 
             # If no tests are supposed to fail or some failed for which we were not able to count errors fail now

diff --git a/test/easyblocks/easyblock_specific.py b/test/easyblocks/easyblock_specific.py
@@ -40,6 +40,7 @@
 from easybuild.base.testing import TestCase
 from easybuild.easyblocks.generic.cmakemake import det_cmake_version
 from easybuild.easyblocks.generic.toolchain import Toolchain
+from easybuild.easyblocks import pytorch
 from easybuild.framework.easyblock import EasyBlock, get_easyblock_instance
 from easybuild.framework.easyconfig.easyconfig import process_easyconfig
 from easybuild.tools import config
@@ -51,6 +52,97 @@
 from easybuild.tools.options import set_tmpdir
 from easybuild.tools.py2vs3 import StringIO
 
+PYTORCH_TESTS_OUTPUT = """
+...
+AssertionError: Expected zero exit code but got -6 for pid: 2006681
+
+----------------------------------------------------------------------
+Ran 2 tests in 6.576s
+
+FAILED (failures=2)
+distributed/fsdp/test_fsdp_input failed!
+Running distributed/fsdp/test_fsdp_multiple_forward ... [2023-01-12 05:46:45.746098]
+
+RuntimeError: Process 0 terminated or timed out after 610.0615825653076 seconds
+
+----------------------------------------------------------------------
+Ran 1 test in 610.744s
+
+FAILED (errors=1)
+Test exited with non-zero exitcode 1. Command to reproduce: /software/Python/3.9.6-GCCcore-11.2.0/bin/python distributed/test_c10d_gloo.py -v DistributedDataParallelTest.test_ddp_comm_hook_register_just_once
+
+RuntimeError: Process 0 terminated or timed out after 610.0726096630096 seconds
+
+----------------------------------------------------------------------
+Ran 1 test in 610.729s
+
+FAILED (errors=1)
+Test exited with non-zero exitcode 1. Command to reproduce: /software/Python/3.9.6-GCCcore-11.2.0/bin/python distributed/test_c10d_gloo.py -v DistributedDataParallelTest.test_ddp_invalid_comm_hook_init
+test_ddp_invalid_comm_hook_return_type (__main__.DistributedDataParallelTest)
+
+AssertionError: 4 unit test(s) failed:
+    DistributedDataParallelTest.test_ddp_comm_hook_register_just_once
+    DistributedDataParallelTest.test_ddp_invalid_comm_hook_init
+    ProcessGroupGlooTest.test_round_robin
+    ProcessGroupGlooTest.test_round_robin_create_destroy
+distributed/test_c10d_gloo failed!
+Running distributed/test_c10d_nccl ... [2023-01-12 07:43:41.085197]
+
+ValueError: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:
+4.535600093557479e-05
+
+----------------------------------------------------------------------
+Ran 216 tests in 22.396s
+
+FAILED (errors=4)
+distributions/test_distributions failed!
+
+Running test_autograd ... [2023-01-13 04:19:25.587981]
+Executing ['/software/Python/3.9.6-GCCcore-11.2.0/bin/python', 'test_autograd.py', '-v'] ... [2023-01-13 04:19:25.588074]
+...
+test_autograd_views_codegen (__main__.TestAutograd) ... ok
+...
+======================================================================
+FAIL: test_thread_shutdown (__main__.TestAutograd)
+----------------------------------------------------------------------
+Traceback (most recent call last):
+  File "/tmp/vsc40023/easybuild_build/PyTorch/1.11.0/foss-2021b/pytorch-v1.11.0/test/test_autograd.py", line 4220, in test_thread_shutdown
+    self.assertRegex(s, "PYTORCH_API_USAGE torch.autograd.thread_shutdown")
+AssertionError: Regex didn't match: 'PYTORCH_API_USAGE torch.autograd.thread_shutdown' not found in 'PYTORCH_API_USAGE torch.python.import\nPYTORCH_API_USAGE c10d.python.import\nPYTORCH_API_USAGE tensor.create\n'
+----------------------------------------------------------------------
+Ran 464 tests in 18.443s
+
+FAILED (failures=1, skipped=52, expected failures=1)
+test_autograd failed!
+Running test_binary_ufuncs ... [2023-01-12 09:02:45.049490]
+...
+
+Running test_jit_cuda_fuser ... [2023-01-12 04:04:08.949222]
+Executing ['/software/Python/3.9.6-GCCcore-11.2.0/bin/python', 'test_jit_cuda_fuser.py', '-v'] ... [2023-01-12 04:04:08.949319]
+CUDA not available, skipping tests
+monkeytype is not installed. Skipping tests for Profile-Directed Typing
+Traceback (most recent call last):
+  File "/tmp/easybuild_build/PyTorch/1.11.0/foss-2021b/pytorch-v1.11.0/test/test_jit_cuda_fuser.py", line 25, in <module>
+    CUDA_MAJOR, CUDA_MINOR = (int(x) for x in torch.version.cuda.split('.'))
+AttributeError: 'NoneType' object has no attribute 'split'
+test_jit_cuda_fuser failed!
+...
+Running distributions/test_constraints ... [2023-01-12 09:05:15.013470]
+SKIPPED [2] distributions/test_constraints.py:83: `biject_to` not implemented.
+FAILED distributions/test_constraints.py::test_constraint[True-constraint_fn5-False-value5]
+FAILED distributions/test_constraints.py::test_constraint[True-constraint_fn7-True-value7]
+============= 2 failed, 128 passed, 2 skipped, 2 warnings in 8.66s =============
+distributions/test_constraints failed!
+
+Running distributions/rpc/test_tensorpipe_agent ... [2023-01-12 09:06:37.093571]
+...
+Ran 123 tests in 7.549s
+
+FAILED (errors=2, skipped=2)
+...
+test_fx failed! Received signal: SIGSEGV
+"""  # noqa
+
 
 class EasyBlockSpecificTest(TestCase):
     """ Baseclass for easyblock testcases """
@@ -265,6 +357,39 @@ def test_det_cmake_version(self):
         """))
         self.assertEqual(det_cmake_version(), '1.2.3-rc4')
 
+    def test_pytorch_extract_failed_tests_info(self):
+        """
+        Test extract_failed_tests_info function from PyTorch easyblock.
+        """
+        res = pytorch.extract_failed_tests_info(PYTORCH_TESTS_OUTPUT)
+        self.assertEqual(len(res), 4)
+
+        expected_failure_report = '\n'.join([
+            "distributed/fsdp/test_fsdp_input (2 total tests, failures=2)",
+            "distributions/test_distributions (216 total tests, errors=4)",
+            "test_autograd (464 total tests, failures=1, skipped=52, expected failures=1)",
+            "test_fx (123 total tests, errors=2, skipped=2)",
+            "distributions/test_constraints 2 failed, 128 passed, 2 skipped, 2 warnings",
+            "distributed/test_c10d_gloo (4 failed tests)",
+            "test_jit_cuda_fuser (unknown failed test count)",
+        ])
+        self.assertEqual(res.failure_report.strip(), expected_failure_report)
+        # test failures
+        self.assertEqual(res.failure_cnt, 10)
+        # test errors
+        self.assertEqual(res.error_cnt, 6)
+
+        expected_failed_test_suites = [
+            'distributed/fsdp/test_fsdp_input',
+            'distributions/test_distributions',
+            'test_autograd',
+            'test_fx',
+            'distributions/test_constraints',
+            'distributed/test_c10d_gloo',
+            'test_jit_cuda_fuser',
+        ]
+        self.assertEqual(res.failed_test_suites, expected_failed_test_suites)
+
 
 def suite():
     """Return all easyblock-specific tests."""