From 41cafc33f72f4e07582b8a7b959f856a666fa2c6 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Mon, 23 Jun 2025 20:33:08 +0200
Subject: [PATCH 01/20] Add tools/{gh_parse.py,gh_report.py}: stats and logs
 for integration tests

---
 tools/gh_parse.py  | 237 +++++++++++++++++++++++++++++++++++++++++++++
 tools/gh_report.py | 172 ++++++++++++++++++++++++++++++++
 2 files changed, 409 insertions(+)
 create mode 100755 tools/gh_parse.py
 create mode 100755 tools/gh_report.py

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
new file mode 100755
index 0000000000..745f1180c9
--- /dev/null
+++ b/tools/gh_parse.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+"""
+Analyze downloaded GH logs and print a report. Use gh_report.py instead of this script directly.
+"""
+
+import sys
+import json
+import argparse
+import re
+from collections import Counter
+from pathlib import Path
+
+
+FLAKY = "🔄\u200cflaky"
+FAIL = "❌\u200cFAIL"
+PASS = "✅\u200cpass"
+SKIP = "🙈\u200cskip"
+
+ACTION_MESSAGES = {
+    # \u200c is zero-width space. It is added so that len of the string corresponds to real width.
+    # ❌, ✅, 🔄 each take space of 2 characters.
+    "fail": FAIL,
+    "pass": PASS,
+    "skip": SKIP,
+}
+
+
+def cleanup_env(name):
+    """
+    >>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
+    'aws/lin'
+
+    >>> cleanup_env("test-output-gcp-prod-is-windows-server-latest")
+    'gcp/win'
+
+    >>> cleanup_env("test-output-azure-prod-ucws-is-linux-ubuntu-latest")
+    'az-ucws/lin'
+    """
+    if not name.startswith("test-output-"):
+        return ""
+    name = name.removeprefix("test-output-")
+    name = name.replace("-prod-ucws-is-", "-ucws-")
+    name = name.replace("-prod-is-", "-")
+    name = name.replace("-linux-ubuntu-latest", "/lin")
+    name = name.replace("-windows-server-latest", "/win")
+    name = name.replace("azure", "az")
+    return name
+
+
+def iter_path(filename):
+    p = Path(filename)
+    if p.is_file():
+        yield filename
+        return
+    for dirpath, dirnames, filenames in p.walk():
+        for f in filenames:
+            yield dirpath / f
+
+
+def iter_paths(paths):
+    for path in paths:
+        for filename in iter_path(path):
+            yield filename
+
+
+def parse_file(path, filter):
+    results = {}
+    outputs = {}
+    for line in path.open():
+        if not line.strip():
+            continue
+        try:
+            data = json.loads(line)
+        except Exception as ex:
+            print(f"{filename}: {ex}\n{line!r}\n")
+            break
+        testname = data.get("Test")
+        if not testname:
+            continue
+        if filter and filter not in testname:
+            continue
+        action = data.get("Action")
+
+        action = ACTION_MESSAGES.get(action, action)
+
+        if action in (FAIL, PASS, SKIP):
+            prev = results.get(testname)
+            if prev == FAIL and action == PASS:
+                results[testname] = FLAKY
+            else:
+                results[testname] = action
+
+        out = data.get("Output")
+        if out:
+            outputs.setdefault(testname, []).append(out.rstrip())
+
+    return results, outputs
+
+
+def print_report(filenames, filter, filter_env, show_output):
+    outputs = {}  # testname -> env -> [output]
+    per_env_stats = {}  # env -> action -> count
+    per_test_per_env_stats = {}  # testname -> env -> action -> count
+    all_testnames = set()
+    for filename in iter_paths(filenames):
+        p = Path(filename)
+        env = cleanup_env(p.parent.name)
+        if not env:
+            print(f"Ignoring {filename}: cannot extract env")
+            continue
+        if filter_env and filter_env not in env:
+            continue
+        test_results, test_outputs = parse_file(p, filter)
+        for testname, action in test_results.items():
+            per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
+            per_env_stats.setdefault(env, Counter())[action] += 1
+        for testname, output in test_outputs.items():
+            outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
+        all_testnames.update(test_results)
+
+    table = []
+    for env, stats in sorted(per_env_stats.items()):
+        if FAIL in stats:
+            status = FAIL[:2]
+        elif FLAKY in stats:
+            status = FLAKY[:2]
+        elif PASS in stats:
+            status = PASS[:2]
+        else:
+            status = "??"
+
+        table.append(
+            {
+                " ": status,
+                "env": env,
+                **stats,
+            }
+        )
+    print_table(table)
+
+    interesting_envs = set()
+    for env, stats in per_env_stats.items():
+        if FAIL in stats or FLAKY in stats:
+            interesting_envs.add(env)
+
+    simplified_results = {}  # testname -> env -> action
+    for testname, items in sorted(per_test_per_env_stats.items()):
+        per_testname_result = simplified_results.setdefault(testname, {})
+        # first record FAIL or FLAKY
+        for env, counts in items.items():
+            for action in (FAIL, FLAKY):
+                if action in counts:
+                    per_testname_result.setdefault(env, action)
+                    break
+
+        # Test is only interesting if it had FAIL or FLAKY entries above.
+        # In that case complete the row:
+        if per_testname_result:
+            for env, counts in items.items():
+                if env not in interesting_envs:
+                    continue
+                for action in (PASS, SKIP):
+                    if action in counts:
+                        per_testname_result.setdefault(env, action)
+                        break
+
+        if not per_testname_result:
+            per_testname_result = simplified_results.pop(testname)
+
+    table = []
+    for testname, items in simplified_results.items():
+        table.append(
+            {
+                "test": testname,
+                **items,
+            }
+        )
+    print_table(table)
+
+    if show_output:
+        for testname, stats in simplified_results.items():
+            for env, action in stats.items():
+                if action not in (FAIL, FLAKY):
+                    continue
+                out = "\n".join(outputs.get(testname, {}).get(env, []))
+                print(f"### {env} {testname} {action}\n{out}")
+                if out:
+                    print()
+
+
+def print_table(table, columns=None):
+    """
+    Pretty-print a list-of-dicts as an aligned text table.
+
+    Args:
+        table (list[dict]): the data rows
+        columns (list[str]): header names & column order
+    """
+    if not table:
+        return
+
+    if columns is None:
+        columns = []
+        seen = set()
+        for row in table:
+            for key in row:
+                if key in seen:
+                    continue
+                seen.add(key)
+                columns.append(key)
+
+    widths = [len(col) for col in columns]
+    for row in table:
+        for i, col in enumerate(columns):
+            widths[i] = max(widths[i], len(str(row.get(col, ""))))
+
+    fmt = lambda cells: "  ".join(str(cell).ljust(w) for cell, w in zip(cells, widths))
+
+    print(fmt(columns))
+    for ind, row in enumerate(table):
+        print(fmt([row.get(col, "") for col in columns]))
+
+    print()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filenames", nargs="+", help="Filenames or directories to parse")
+    parser.add_argument("--filter", help="Filter results by test name (substring match)")
+    parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
+    parser.add_argument("--output", help="Show output for failed tests", action="store_true")
+    args = parser.parse_args()
+    print_report(args.filenames, filter=args.filter, filter_env=args.filter_env, show_output=args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/gh_report.py b/tools/gh_report.py
new file mode 100755
index 0000000000..1718e8e47c
--- /dev/null
+++ b/tools/gh_report.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Download integration logs artifacts for a given run id (--run RUNID) or commit (--commit) and call gh_parse.py on those to print the report.
+
+If neither --commit nor --run are passed, will use either current PR or HEAD.
+"""
+
+import sys
+import os
+import subprocess
+import argparse
+import json
+import pprint
+
+
+CLI_REPO = "databricks/cli"
+DECO_REPO = os.environ["DECO_REPO"]
+DECO_TESTS_PREFIX = "https://go/deco-tests/"
+CLI_TESTS_PREFIX = "https://github.com/databricks/cli/actions/runs/"
+
+
+def run(cmd, shell=False):
+    if isinstance(cmd, str):
+        sys.stderr.write(f"+ {cmd}\n")
+        return subprocess.run(cmd, check=True, shell=True)
+    else:
+        sys.stderr.write("+ " + " ".join(cmd) + "\n")
+        return subprocess.run(cmd, check=True, shell=False)
+
+
+def run_text(cmd, print_command=False):
+    if print_command:
+        sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8", check=True)
+    return result.stdout.strip()
+
+
+def run_json(cmd):
+    sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8", check=True)
+
+    try:
+        return json.loads(result.stdout)
+    except Exception:
+        sys.stderr.write(f"Failed to parse JSON:\n{result.stdout}\n")
+        raise
+
+
+def current_branch():
+    return run_text("git branch --show-current".split())
+
+
+def get_run_id_from_items(items, field, prefix, data):
+    found = set()
+
+    for item in items or []:
+        url = item.get(field, "")
+        if url.startswith(prefix):
+            run_id = url.removeprefix(prefix).split("/")[0]
+            assert run_id.isdigit(), url
+            found.add(int(run_id))
+
+    found = sorted(found)
+
+    if not found:
+        print(pprint.pformat(data), flush=True, file=sys.stderr)
+        sys.exit(f"run_id not found (search: {field=} {prefix=})")
+    elif len(found) > 1:
+        print(f"many run_ids (search: {field=} {prefix=}): {found}", file=sys.stderr, flush=True)
+
+    return found[-1]
+
+
+def get_pr_run_id_integration():
+    data = run_json("gh pr status --json statusCheckRollup".split())
+    items = data.get("currentBranch", {}).get("statusCheckRollup")
+    return get_run_id_from_items(items, "targetUrl", DECO_TESTS_PREFIX, data)
+
+
+def get_commit_run_id_integration(commit):
+    data = run_json(["gh", "api", f"repos/databricks/cli/commits/{commit}/status"])
+    items = data.get("statuses")
+    return get_run_id_from_items(items, "target_url", DECO_TESTS_PREFIX, data)
+
+
+def get_pr_run_id_unit():
+    data = run_json("gh pr status --json statusCheckRollup".split())
+    items = data.get("currentBranch", {}).get("statusCheckRollup")
+    items = [x for x in items if x.get("workflowName") == "build"]
+    return get_run_id_from_items(items, "detailsUrl", CLI_TESTS_PREFIX, data)
+
+
+def get_commit_run_id_unit(commit):
+    data = run_json(["gh", "run", "list", "-c", commit, "--json", "databaseId,workflowName"])
+    results = []
+    try:
+        for item in data:
+            if item["workflowName"] == "build":
+                results.append(int(item["databaseId"]))
+        results.sort()
+        assert len(results) == 1, results
+    except Exception:
+        print(pprint.pformat(data), flush=True, file=sys.stderr)
+        if not results:
+            raise
+
+    return results[-1]
+
+
+def download_run_id(run_id, repo, rm):
+    target_dir = f".gh-logs/{run_id}"
+    if os.path.exists(target_dir):
+        if rm:
+            run(f"rm -fr {target_dir}")
+        else:
+            print(f"Already exists: {target_dir}. If that directory contains partial results, delete it to re-download: rm -fr .gh-logs/{run_id}")
+            return target_dir
+    cmd = ["gh", "run", "-R", repo, "download", str(run_id), "-D", target_dir]
+    run(cmd)
+    return target_dir
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--run", type=int, help="Github run_id to load")
+    parser.add_argument("--commit", help="Commit to get run_id from. If not set, getting either PR status or most recent commit")
+    parser.add_argument("--rm", help="Remove previously downloaded files first", action="store_true")
+    parser.add_argument("--filter", help="Filter results by test name (substring match)")
+    parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
+    parser.add_argument("--output", help="Show output for failing tests", action="store_true")
+
+    # This does not work because we don't store artifacts for unit tests. We could download logs instead but that requires different parsing method:
+    # ~/work/cli % gh api -H "Accept: application/vnd.github+json" /repos/databricks/cli/actions/runs/15827411452/logs  > logs.zip
+    parser.add_argument("--unit", action="store_true", help="Extract run_id for unit tests rather than integration tests (not working)")
+    args = parser.parse_args()
+
+    repo = CLI_REPO if args.unit else DECO_REPO
+
+    if not args.run and not args.commit:
+        if current_branch() == "main":
+            args.commit = run_text("git rev-parse --short HEAD".split())
+        else:
+            if args.unit:
+                args.run = get_pr_run_id_unit()
+            else:
+                args.run = get_pr_run_id_integration()
+
+    if args.commit:
+        assert not args.run
+        if args.unit:
+            args.run = get_commit_run_id_unit(args.commit)
+        else:
+            args.run = get_commit_run_id_integration(args.commit)
+
+    target_dir = download_run_id(args.run, repo, rm=args.rm)
+    print(flush=True)
+    cmd = ["./tools/gh_parse.py"]
+    if args.filter:
+        cmd.append(f"--filter {args.filter}")
+    if args.filter_env:
+        cmd.append(f"--filter-env {args.filter_env}")
+    if args.output:
+        cmd.append(f"--output")
+    cmd.append(f"{target_dir}/*/*")
+    run(" ".join(cmd), shell=True)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except subprocess.CalledProcessError as ex:
+        sys.exit(ex)

From af6254df939c3dab7723eb045b4b23631de560f9 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Wed, 25 Jun 2025 11:33:28 +0200
Subject: [PATCH 02/20] update gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index d9cdc190a9..c4e0f4b10a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,6 @@ test-output.json
 # Built by make for 'make fmt' and yamlcheck.py in acceptance tests
 tools/yamlfmt
 tools/yamlfmt.exe
+
+# Cache for tools/gh_report.py
+.gh-logs

From efe2ed6093e7cd77b1a11b0cf0db492d387cafdf Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Wed, 25 Jun 2025 14:00:57 +0200
Subject: [PATCH 03/20] Support cases where test result is not record but
 output is:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Classify those cases either as MISS or PANIC, based on the output:

~/work/cli-main % ./tools/gh_report.py --run 15874943679
Already exists: .gh-logs/15874943679. If that directory contains partial results, delete it to re-download: rm -fr .gh-logs/15874943679

+ ./tools/gh_parse.py .gh-logs/15874943679/*/*
    env           🙈‌skip  ✅‌pass  💥‌PANIC  ❌‌FAIL  🔄‌flaky
✅‌  aws-ucws/lin  265     400
✅‌  aws-ucws/win  264     401
💥‌  az-ucws/lin   262     392     1
✅‌  az-ucws/win   261     403
✅‌  az/lin        364     298
✅‌  az/win        363     299
❌‌  gcp/lin       366     292              3       2
❌‌  gcp/win       365     295              3

test                                              gcp/lin  gcp/win  az-ucws/lin
TestAccept                                        ❌‌FAIL   ❌‌FAIL
TestAccept/bundle/deploy/dashboard/detect-change  ❌‌FAIL   ❌‌FAIL
TestAccept/bundle/deployment/bind/dashboard       ❌‌FAIL   ❌‌FAIL
TestReposCreateWithProvider                       🔄‌flaky  ✅‌pass
TestReposDeleteByPath                             🔄‌flaky  ✅‌pass
TestSyncNestedFolderSync                          ✅‌pass   ✅‌pass   💥‌PANIC
---
 tools/gh_parse.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 745f1180c9..697410ffde 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -16,6 +16,13 @@
 PASS = "✅\u200cpass"
 SKIP = "🙈\u200cskip"
 
+# This happens when Eventually is used - there is output for the test but no result.
+MISSING = "🤯\u200cMISS"
+PANIC = "💥\u200cPANIC"
+
+INTERESTING_ACTIONS = (FAIL, FLAKY, PANIC, MISSING)
+ACTIONS_WITH_ICON = INTERESTING_ACTIONS + (PASS, SKIP)
+
 ACTION_MESSAGES = {
     # \u200c is zero-width space. It is added so that len of the string corresponds to real width.
     # ❌, ✅, 🔄 each take space of 2 characters.
@@ -94,6 +101,14 @@ def parse_file(path, filter):
         if out:
             outputs.setdefault(testname, []).append(out.rstrip())
 
+    for testname, lines in outputs.items():
+        if testname in results:
+            continue
+        if "panic: " in str(lines):
+            results.setdefault(testname, PANIC)
+        else:
+            results.setdefault(testname, MISS)
+
     return results, outputs
 
 
@@ -120,14 +135,11 @@ def print_report(filenames, filter, filter_env, show_output):
 
     table = []
     for env, stats in sorted(per_env_stats.items()):
-        if FAIL in stats:
-            status = FAIL[:2]
-        elif FLAKY in stats:
-            status = FLAKY[:2]
-        elif PASS in stats:
-            status = PASS[:2]
-        else:
-            status = "??"
+        status = "??"
+        for action in ACTIONS_WITH_ICON:
+            if action in stats:
+                status = action[:2]
+                break
 
         table.append(
             {
@@ -146,15 +158,14 @@ def print_report(filenames, filter, filter_env, show_output):
     simplified_results = {}  # testname -> env -> action
     for testname, items in sorted(per_test_per_env_stats.items()):
         per_testname_result = simplified_results.setdefault(testname, {})
-        # first record FAIL or FLAKY
+        # first select tests with interesting actions (anything but pass or skip)
         for env, counts in items.items():
-            for action in (FAIL, FLAKY):
+            for action in INTERESTING_ACTIONS:
                 if action in counts:
                     per_testname_result.setdefault(env, action)
                     break
 
-        # Test is only interesting if it had FAIL or FLAKY entries above.
-        # In that case complete the row:
+        # Once we know test is interesting, complete the row
         if per_testname_result:
             for env, counts in items.items():
                 if env not in interesting_envs:
@@ -180,7 +191,7 @@ def print_report(filenames, filter, filter_env, show_output):
     if show_output:
         for testname, stats in simplified_results.items():
             for env, action in stats.items():
-                if action not in (FAIL, FLAKY):
+                if action not in INTERESTING_ACTIONS:
                     continue
                 out = "\n".join(outputs.get(testname, {}).get(env, []))
                 print(f"### {env} {testname} {action}\n{out}")

From 55246d576ca37baaabe9ddf3017c3296603b2fc3 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 10:16:11 +0200
Subject: [PATCH 04/20] make sure to find gh_parse.py no matter how we run

---
 tools/gh_report.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/gh_report.py b/tools/gh_report.py
index 1718e8e47c..b3c2513ee4 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -11,12 +11,20 @@
 import argparse
 import json
 import pprint
+from pathlib import Path
 
 
 CLI_REPO = "databricks/cli"
 DECO_REPO = os.environ["DECO_REPO"]
 DECO_TESTS_PREFIX = "https://go/deco-tests/"
 CLI_TESTS_PREFIX = "https://github.com/databricks/cli/actions/runs/"
+DIRECTORY = Path(__file__).parent
+PARSE_SCRIPT = DIRECTORY / "gh_parse.py"
+
+try:
+    PARSE_SCRIPT = PARSE_SCRIPT.relative_to(os.getcwd())
+except Exception:
+    pass  # keep absolute
 
 
 def run(cmd, shell=False):
@@ -154,7 +162,7 @@ def main():
 
     target_dir = download_run_id(args.run, repo, rm=args.rm)
     print(flush=True)
-    cmd = ["./tools/gh_parse.py"]
+    cmd = [str(PARSE_SCRIPT)]
     if args.filter:
         cmd.append(f"--filter {args.filter}")
     if args.filter_env:

From 14cb5703b226fdc71bb61abfaf8f2a1db8fe72c9 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 11:06:38 +0200
Subject: [PATCH 05/20] fix bug when calculating "interesting envs"

---
 tools/gh_parse.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 697410ffde..de52c99229 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -152,8 +152,10 @@ def print_report(filenames, filter, filter_env, show_output):
 
     interesting_envs = set()
     for env, stats in per_env_stats.items():
-        if FAIL in stats or FLAKY in stats:
-            interesting_envs.add(env)
+        for act in INTERESTING_ACTIONS:
+            if act in stats:
+                interesting_envs.add(env)
+                break
 
     simplified_results = {}  # testname -> env -> action
     for testname, items in sorted(per_test_per_env_stats.items()):

From b5f06e546d1f05dda4c075927b4d82da7ae66d8e Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 12:39:43 +0200
Subject: [PATCH 06/20] Also read GITHUB_REPOSITORY

---
 tools/gh_report.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/gh_report.py b/tools/gh_report.py
index b3c2513ee4..9be43987a7 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -15,7 +15,7 @@
 
 
 CLI_REPO = "databricks/cli"
-DECO_REPO = os.environ["DECO_REPO"]
+DECO_REPO = os.environ.get("DECO_REPO") or os.environ.get("GITHUB_REPOSITORY")
 DECO_TESTS_PREFIX = "https://go/deco-tests/"
 CLI_TESTS_PREFIX = "https://github.com/databricks/cli/actions/runs/"
 DIRECTORY = Path(__file__).parent
@@ -143,6 +143,7 @@ def main():
     args = parser.parse_args()
 
     repo = CLI_REPO if args.unit else DECO_REPO
+    assert repo
 
     if not args.run and not args.commit:
         if current_branch() == "main":

From 01c4e5f937fa07a3aca7fac1f794936b15eab9dc Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 16:42:01 +0200
Subject: [PATCH 07/20] missing tests support

---
 tools/gh_parse.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index de52c99229..67f67299c9 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -114,9 +114,9 @@ def parse_file(path, filter):
 
 def print_report(filenames, filter, filter_env, show_output):
     outputs = {}  # testname -> env -> [output]
-    per_env_stats = {}  # env -> action -> count
     per_test_per_env_stats = {}  # testname -> env -> action -> count
     all_testnames = set()
+    all_envs = set()
     for filename in iter_paths(filenames):
         p = Path(filename)
         env = cleanup_env(p.parent.name)
@@ -125,14 +125,33 @@ def print_report(filenames, filter, filter_env, show_output):
             continue
         if filter_env and filter_env not in env:
             continue
+        all_envs.add(env)
         test_results, test_outputs = parse_file(p, filter)
         for testname, action in test_results.items():
             per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
-            per_env_stats.setdefault(env, Counter())[action] += 1
         for testname, output in test_outputs.items():
             outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
         all_testnames.update(test_results)
 
+    # Check for missing tests
+    for testname in all_testnames:
+        # It is possible for test to be missing if it's parent is skipped, ignore test cases with a parent.
+        # For acceptance tests, ignore tests with subtests produced via EnvMatrix
+        if testname.startswith("TestAccept/") and "=" in testname:
+            continue
+        # For non-acceptance tests ignore all subtests.
+        if not testname.startswith("TestAccept/") and "/" in testname:
+            continue
+        test_results = per_test_per_env_stats.get(testname, {})
+        for e in all_envs:
+            if e not in test_results:
+                test_results.setdefault(e, Counter())[MISSING] += 1
+
+    per_env_stats = {}  # env -> action -> count
+    for testname, items in per_test_per_env_stats.items():
+        for env, stats in items.items():
+            per_env_stats.setdefault(env, Counter()).update(stats)
+
     table = []
     for env, stats in sorted(per_env_stats.items()):
         status = "??"

From ff3d4c9d2c8c44d948b703e0b94fb4a99f921fa9 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 17:48:39 +0200
Subject: [PATCH 08/20] try stdout/stderr reconfigure
 https://stackoverflow.com/a/73486671

---
 tools/gh_parse.py  | 4 ++++
 tools/gh_report.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 67f67299c9..28e8b6bf7e 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -4,6 +4,10 @@
 """
 
 import sys
+
+sys.stdout.reconfigure(encoding="utf-8")
+sys.stderr.reconfigure(encoding="utf-8")
+
 import json
 import argparse
 import re
diff --git a/tools/gh_report.py b/tools/gh_report.py
index 9be43987a7..9b23ca2434 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -6,6 +6,10 @@
 """
 
 import sys
+
+sys.stdout.reconfigure(encoding="utf-8")
+sys.stderr.reconfigure(encoding="utf-8")
+
 import os
 import subprocess
 import argparse

From b1865a3a91bbbb791c80a9152c68ffd3c00bcd55 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 17:57:04 +0200
Subject: [PATCH 09/20] add --markdown

---
 tools/gh_parse.py | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 28e8b6bf7e..0ee2704d2d 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -116,7 +116,7 @@ def parse_file(path, filter):
     return results, outputs
 
 
-def print_report(filenames, filter, filter_env, show_output):
+def print_report(filenames, filter, filter_env, show_output, markdown=False):
     outputs = {}  # testname -> env -> [output]
     per_test_per_env_stats = {}  # testname -> env -> action -> count
     all_testnames = set()
@@ -171,7 +171,7 @@ def print_report(filenames, filter, filter_env, show_output):
                 **stats,
             }
         )
-    print_table(table)
+    print_table(table, markdown=markdown)
 
     interesting_envs = set()
     for env, stats in per_env_stats.items():
@@ -211,7 +211,7 @@ def print_report(filenames, filter, filter_env, show_output):
                 **items,
             }
         )
-    print_table(table)
+    print_table(table, markdown=markdown)
 
     if show_output:
         for testname, stats in simplified_results.items():
@@ -219,18 +219,22 @@ def print_report(filenames, filter, filter_env, show_output):
                 if action not in INTERESTING_ACTIONS:
                     continue
                 out = "\n".join(outputs.get(testname, {}).get(env, []))
-                print(f"### {env} {testname} {action}\n{out}")
+                if markdown:
+                    print(f"### {env} {testname} {action}\n```\n{out}\n```")
+                else:
+                    print(f"### {env} {testname} {action}\n{out}")
                 if out:
                     print()
 
 
-def print_table(table, columns=None):
+def print_table(table, columns=None, markdown=False):
     """
     Pretty-print a list-of-dicts as an aligned text table.
 
     Args:
         table (list[dict]): the data rows
         columns (list[str]): header names & column order
+        markdown (bool): whether to output in markdown format
     """
     if not table:
         return
@@ -250,11 +254,19 @@ def print_table(table, columns=None):
         for i, col in enumerate(columns):
             widths[i] = max(widths[i], len(str(row.get(col, ""))))
 
-    fmt = lambda cells: "  ".join(str(cell).ljust(w) for cell, w in zip(cells, widths))
-
-    print(fmt(columns))
-    for ind, row in enumerate(table):
-        print(fmt([row.get(col, "") for col in columns]))
+    if markdown:
+        # Header
+        print("| " + " | ".join(str(col).ljust(w) for col, w in zip(columns, widths)) + " |")
+        # Separator
+        print("| " + " | ".join("-" * w for w in widths) + " |")
+        # Data rows
+        for row in table:
+            print("| " + " | ".join(str(row.get(col, "")).ljust(w) for col, w in zip(columns, widths)) + " |")
+    else:
+        fmt = lambda cells: "  ".join(str(cell).ljust(w) for cell, w in zip(cells, widths))
+        print(fmt(columns))
+        for ind, row in enumerate(table):
+            print(fmt([row.get(col, "") for col in columns]))
 
     print()
 
@@ -265,8 +277,9 @@ def main():
     parser.add_argument("--filter", help="Filter results by test name (substring match)")
     parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
     parser.add_argument("--output", help="Show output for failed tests", action="store_true")
+    parser.add_argument("--markdown", help="Output in GitHub-flavored markdown format", action="store_true")
     args = parser.parse_args()
-    print_report(args.filenames, filter=args.filter, filter_env=args.filter_env, show_output=args.output)
+    print_report(args.filenames, filter=args.filter, filter_env=args.filter_env, show_output=args.output, markdown=args.markdown)
 
 
 if __name__ == "__main__":

From f3c9a4d7d628c200b1c61937f317532ec785baf0 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 18:01:41 +0200
Subject: [PATCH 10/20] table formatting

---
 tools/gh_parse.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 0ee2704d2d..ae8c601b07 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -39,21 +39,21 @@
 def cleanup_env(name):
     """
     >>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
-    'aws/lin'
+    'aws lin'
 
     >>> cleanup_env("test-output-gcp-prod-is-windows-server-latest")
-    'gcp/win'
+    'gcp win'
 
     >>> cleanup_env("test-output-azure-prod-ucws-is-linux-ubuntu-latest")
-    'az-ucws/lin'
+    'az-ucws lin'
     """
     if not name.startswith("test-output-"):
         return ""
     name = name.removeprefix("test-output-")
     name = name.replace("-prod-ucws-is-", "-ucws-")
     name = name.replace("-prod-is-", "-")
-    name = name.replace("-linux-ubuntu-latest", "/lin")
-    name = name.replace("-windows-server-latest", "/win")
+    name = name.replace("-linux-ubuntu-latest", " lin")
+    name = name.replace("-windows-server-latest", " win")
     name = name.replace("azure", "az")
     return name
 
@@ -207,7 +207,7 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False):
     for testname, items in simplified_results.items():
         table.append(
             {
-                "test": testname,
+                "Test Name": testname,
                 **items,
             }
         )

From 9e99ae4f5ea6adea9aa9250e156644c2805cc0f0 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 18:02:46 +0200
Subject: [PATCH 11/20] propagate --markdown

---
 tools/gh_report.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/gh_report.py b/tools/gh_report.py
index 9b23ca2434..a02c481fea 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -140,6 +140,7 @@ def main():
     parser.add_argument("--filter", help="Filter results by test name (substring match)")
     parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
     parser.add_argument("--output", help="Show output for failing tests", action="store_true")
+    parser.add_argument("--markdown", help="Output in GitHub-flavored markdown format", action="store_true")
 
     # This does not work because we don't store artifacts for unit tests. We could download logs instead but that requires different parsing method:
     # ~/work/cli % gh api -H "Accept: application/vnd.github+json" /repos/databricks/cli/actions/runs/15827411452/logs  > logs.zip
@@ -174,6 +175,8 @@ def main():
         cmd.append(f"--filter-env {args.filter_env}")
     if args.output:
         cmd.append(f"--output")
+    if args.markdown:
+        cmd.append(f"--markdown")
     cmd.append(f"{target_dir}/*/*")
     run(" ".join(cmd), shell=True)
 

From 2d41df882d472834a2b41c370e384dd05254c057 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 18:10:50 +0200
Subject: [PATCH 12/20] table formatting

---
 tools/gh_parse.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index ae8c601b07..d69a46787d 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -39,22 +39,21 @@
 def cleanup_env(name):
     """
     >>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
-    'aws lin'
+    'aws linux'
 
     >>> cleanup_env("test-output-gcp-prod-is-windows-server-latest")
-    'gcp win'
+    'gcp windows'
 
     >>> cleanup_env("test-output-azure-prod-ucws-is-linux-ubuntu-latest")
-    'az-ucws lin'
+    'azure ucws linux'
     """
     if not name.startswith("test-output-"):
         return ""
     name = name.removeprefix("test-output-")
-    name = name.replace("-prod-ucws-is-", "-ucws-")
+    name = name.replace("-prod-ucws-is-", " ucws-")
     name = name.replace("-prod-is-", "-")
-    name = name.replace("-linux-ubuntu-latest", " lin")
-    name = name.replace("-windows-server-latest", " win")
-    name = name.replace("azure", "az")
+    name = name.replace("-linux-ubuntu-latest", " linux")
+    name = name.replace("-windows-server-latest", " windows")
     return name
 
 

From 821446840658ce3dbf457a9cf47f4fd37a2f1002 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 18:11:41 +0200
Subject: [PATCH 13/20] sort columns

---
 tools/gh_parse.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index d69a46787d..ade0cd18e3 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -166,7 +166,7 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False):
         table.append(
             {
                 " ": status,
-                "env": env,
+                "Env": env,
                 **stats,
             }
         )
@@ -247,6 +247,7 @@ def print_table(table, columns=None, markdown=False):
                     continue
                 seen.add(key)
                 columns.append(key)
+        columns.sort()
 
     widths = [len(col) for col in columns]
     for row in table:

From cdb67208e7f928221a3f4cde7cbcae02b9a02f5a Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 21:43:26 +0200
Subject: [PATCH 14/20] move comment to the right place

---
 tools/gh_parse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index ade0cd18e3..7bbb14f606 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -15,6 +15,8 @@
 from pathlib import Path
 
 
+# \u200c is zero-width space. It is added so that len of the string corresponds to real width.
+# ❌, ✅, 🔄 each take space of 2 characters.
 FLAKY = "🔄\u200cflaky"
 FAIL = "❌\u200cFAIL"
 PASS = "✅\u200cpass"
@@ -28,8 +30,6 @@
 ACTIONS_WITH_ICON = INTERESTING_ACTIONS + (PASS, SKIP)
 
 ACTION_MESSAGES = {
-    # \u200c is zero-width space. It is added so that len of the string corresponds to real width.
-    # ❌, ✅, 🔄 each take space of 2 characters.
     "fail": FAIL,
     "pass": PASS,
     "skip": SKIP,

From f7b4ab2c454896cccb00831559fb85cb7927df29 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 21:59:56 +0200
Subject: [PATCH 15/20] clean up reconfigure(), did not help

---
 tools/gh_parse.py  | 4 ----
 tools/gh_report.py | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 7bbb14f606..962f71dd3f 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -4,10 +4,6 @@
 """
 
 import sys
-
-sys.stdout.reconfigure(encoding="utf-8")
-sys.stderr.reconfigure(encoding="utf-8")
-
 import json
 import argparse
 import re
diff --git a/tools/gh_report.py b/tools/gh_report.py
index a02c481fea..6015d8c967 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -6,10 +6,6 @@
 """
 
 import sys
-
-sys.stdout.reconfigure(encoding="utf-8")
-sys.stderr.reconfigure(encoding="utf-8")
-
 import os
 import subprocess
 import argparse

From f9e71ec146599f29aa2b385f61b3e61608b3d00c Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 22:24:59 +0200
Subject: [PATCH 16/20] log info about parsed files

---
 tools/gh_parse.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 962f71dd3f..d05bd76f84 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -116,6 +116,8 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False):
     per_test_per_env_stats = {}  # testname -> env -> action -> count
     all_testnames = set()
     all_envs = set()
+    count_files = 0
+    count_results = 0
     for filename in iter_paths(filenames):
         p = Path(filename)
         env = cleanup_env(p.parent.name)
@@ -126,12 +128,16 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False):
             continue
         all_envs.add(env)
         test_results, test_outputs = parse_file(p, filter)
+        count_files += 1
+        count_results += len(test_results)
         for testname, action in test_results.items():
             per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
         for testname, output in test_outputs.items():
             outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
         all_testnames.update(test_results)
 
+    print("Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
+
     # Check for missing tests
     for testname in all_testnames:
         # It is possible for test to be missing if it's parent is skipped, ignore test cases with a parent.

From c0ddd528d079352a4c027c55de4cbfdff8369a5f Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 22:32:20 +0200
Subject: [PATCH 17/20] fix f-string

---
 tools/gh_parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index d05bd76f84..6bc8cb8ac3 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -136,7 +136,7 @@ def print_report(filenames, filter, filter_env, show_output, markdown=False):
             outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
         all_testnames.update(test_results)
 
-    print("Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
+    print(f"Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
 
     # Check for missing tests
     for testname in all_testnames:

From c3bbde38d494c0ba7f300d1697d0ebe3dc325eea Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 22:36:43 +0200
Subject: [PATCH 18/20] simplify, do not run through shell

---
 tools/gh_report.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/tools/gh_report.py b/tools/gh_report.py
index 6015d8c967..8bd57ece2d 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -28,12 +28,8 @@
 
 
 def run(cmd, shell=False):
-    if isinstance(cmd, str):
-        sys.stderr.write(f"+ {cmd}\n")
-        return subprocess.run(cmd, check=True, shell=True)
-    else:
-        sys.stderr.write("+ " + " ".join(cmd) + "\n")
-        return subprocess.run(cmd, check=True, shell=False)
+    sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    return subprocess.run(cmd, check=True, shell=False)
 
 
 def run_text(cmd, print_command=False):
@@ -119,7 +115,7 @@ def download_run_id(run_id, repo, rm):
     target_dir = f".gh-logs/{run_id}"
     if os.path.exists(target_dir):
         if rm:
-            run(f"rm -fr {target_dir}")
+            run(["rm", "-fr", target_dir])
         else:
             print(f"Already exists: {target_dir}. If that directory contains partial results, delete it to re-download: rm -fr .gh-logs/{run_id}")
             return target_dir
@@ -173,8 +169,8 @@ def main():
         cmd.append(f"--output")
     if args.markdown:
         cmd.append(f"--markdown")
-    cmd.append(f"{target_dir}/*/*")
-    run(" ".join(cmd), shell=True)
+    cmd.append(f"{target_dir}")
+    run(cmd, shell=True)
 
 
 if __name__ == "__main__":

From 1eae7a32faf6ad843f8fdc61a824bdb610a0c94c Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 22:42:11 +0200
Subject: [PATCH 19/20] call via python executable

---
 tools/gh_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gh_report.py b/tools/gh_report.py
index 8bd57ece2d..b84c25e53e 100755
--- a/tools/gh_report.py
+++ b/tools/gh_report.py
@@ -160,7 +160,7 @@ def main():
 
     target_dir = download_run_id(args.run, repo, rm=args.rm)
     print(flush=True)
-    cmd = [str(PARSE_SCRIPT)]
+    cmd = [sys.executable, str(PARSE_SCRIPT)]
     if args.filter:
         cmd.append(f"--filter {args.filter}")
     if args.filter_env:

From d834569c536a129370c4d9e8271ca55c42174b6a Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Thu, 26 Jun 2025 23:01:05 +0200
Subject: [PATCH 20/20] group ucws and non-ucws together

---
 tools/gh_parse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/gh_parse.py b/tools/gh_parse.py
index 6bc8cb8ac3..265698b0a0 100755
--- a/tools/gh_parse.py
+++ b/tools/gh_parse.py
@@ -41,12 +41,12 @@ def cleanup_env(name):
     'gcp windows'
 
     >>> cleanup_env("test-output-azure-prod-ucws-is-linux-ubuntu-latest")
-    'azure ucws linux'
+    'azure-ucws linux'
     """
     if not name.startswith("test-output-"):
         return ""
     name = name.removeprefix("test-output-")
-    name = name.replace("-prod-ucws-is-", " ucws-")
+    name = name.replace("-prod-ucws-is-", "-ucws-")
     name = name.replace("-prod-is-", "-")
     name = name.replace("-linux-ubuntu-latest", " linux")
     name = name.replace("-windows-server-latest", " windows")