diff --git a/.gitignore b/.gitignore
index d9cdc190a9..c4e0f4b10a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,6 @@ test-output.json
 # Built by make for 'make fmt' and yamlcheck.py in acceptance tests
 tools/yamlfmt
 tools/yamlfmt.exe
+
+# Cache for tools/gh_report.py
+.gh-logs
diff --git a/tools/gh_parse.py b/tools/gh_parse.py
new file mode 100755
index 0000000000..265698b0a0
--- /dev/null
+++ b/tools/gh_parse.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python3
+"""
+Analyze downloaded GH logs and print a report. Use gh_report.py instead of this script directly.
+"""
+
+import sys
+import json
+import argparse
+import re
+from collections import Counter
+from pathlib import Path
+
+
+# \u200c is zero-width space. It is added so that len of the string corresponds to real width.
+# ❌, ✅, 🔄 each take space of 2 characters.
+FLAKY = "🔄\u200cflaky"
+FAIL = "❌\u200cFAIL"
+PASS = "✅\u200cpass"
+SKIP = "🙈\u200cskip"
+
+# This happens when Eventually is used - there is output for the test but no result.
+MISSING = "🤯\u200cMISS"
+PANIC = "💥\u200cPANIC"
+
+INTERESTING_ACTIONS = (FAIL, FLAKY, PANIC, MISSING)
+ACTIONS_WITH_ICON = INTERESTING_ACTIONS + (PASS, SKIP)
+
+ACTION_MESSAGES = {
+    "fail": FAIL,
+    "pass": PASS,
+    "skip": SKIP,
+}
+
+
+def cleanup_env(name):
+    """
+    >>> cleanup_env("test-output-aws-prod-is-linux-ubuntu-latest")
+    'aws linux'
+
+    >>> cleanup_env("test-output-gcp-prod-is-windows-server-latest")
+    'gcp windows'
+
+    >>> cleanup_env("test-output-azure-prod-ucws-is-linux-ubuntu-latest")
+    'azure-ucws linux'
+    """
+    if not name.startswith("test-output-"):
+        return ""
+    name = name.removeprefix("test-output-")
+    name = name.replace("-prod-ucws-is-", "-ucws-")
+    name = name.replace("-prod-is-", "-")
+    name = name.replace("-linux-ubuntu-latest", " linux")
+    name = name.replace("-windows-server-latest", " windows")
+    return name
+
+
+def iter_path(filename):
+    p = Path(filename)
+    if p.is_file():
+        yield filename
+        return
+    for dirpath, dirnames, filenames in p.walk():
+        for f in filenames:
+            yield dirpath / f
+
+
+def iter_paths(paths):
+    for path in paths:
+        for filename in iter_path(path):
+            yield filename
+
+
+def parse_file(path, filter):
+    results = {}
+    outputs = {}
+    for line in path.open():
+        if not line.strip():
+            continue
+        try:
+            data = json.loads(line)
+        except Exception as ex:
+            print(f"{filename}: {ex}\n{line!r}\n")
+            break
+        testname = data.get("Test")
+        if not testname:
+            continue
+        if filter and filter not in testname:
+            continue
+        action = data.get("Action")
+
+        action = ACTION_MESSAGES.get(action, action)
+
+        if action in (FAIL, PASS, SKIP):
+            prev = results.get(testname)
+            if prev == FAIL and action == PASS:
+                results[testname] = FLAKY
+            else:
+                results[testname] = action
+
+        out = data.get("Output")
+        if out:
+            outputs.setdefault(testname, []).append(out.rstrip())
+
+    for testname, lines in outputs.items():
+        if testname in results:
+            continue
+        if "panic: " in str(lines):
+            results.setdefault(testname, PANIC)
+        else:
+            results.setdefault(testname, MISS)
+
+    return results, outputs
+
+
+def print_report(filenames, filter, filter_env, show_output, markdown=False):
+    outputs = {}  # testname -> env -> [output]
+    per_test_per_env_stats = {}  # testname -> env -> action -> count
+    all_testnames = set()
+    all_envs = set()
+    count_files = 0
+    count_results = 0
+    for filename in iter_paths(filenames):
+        p = Path(filename)
+        env = cleanup_env(p.parent.name)
+        if not env:
+            print(f"Ignoring {filename}: cannot extract env")
+            continue
+        if filter_env and filter_env not in env:
+            continue
+        all_envs.add(env)
+        test_results, test_outputs = parse_file(p, filter)
+        count_files += 1
+        count_results += len(test_results)
+        for testname, action in test_results.items():
+            per_test_per_env_stats.setdefault(testname, {}).setdefault(env, Counter())[action] += 1
+        for testname, output in test_outputs.items():
+            outputs.setdefault(testname, {}).setdefault(env, []).extend(output)
+        all_testnames.update(test_results)
+
+    print(f"Parsed {count_files} files: {count_results} results", file=sys.stderr, flush=True)
+
+    # Check for missing tests
+    for testname in all_testnames:
+        # It is possible for test to be missing if it's parent is skipped, ignore test cases with a parent.
+        # For acceptance tests, ignore tests with subtests produced via EnvMatrix
+        if testname.startswith("TestAccept/") and "=" in testname:
+            continue
+        # For non-acceptance tests ignore all subtests.
+        if not testname.startswith("TestAccept/") and "/" in testname:
+            continue
+        test_results = per_test_per_env_stats.get(testname, {})
+        for e in all_envs:
+            if e not in test_results:
+                test_results.setdefault(e, Counter())[MISSING] += 1
+
+    per_env_stats = {}  # env -> action -> count
+    for testname, items in per_test_per_env_stats.items():
+        for env, stats in items.items():
+            per_env_stats.setdefault(env, Counter()).update(stats)
+
+    table = []
+    for env, stats in sorted(per_env_stats.items()):
+        status = "??"
+        for action in ACTIONS_WITH_ICON:
+            if action in stats:
+                status = action[:2]
+                break
+
+        table.append(
+            {
+                " ": status,
+                "Env": env,
+                **stats,
+            }
+        )
+    print_table(table, markdown=markdown)
+
+    interesting_envs = set()
+    for env, stats in per_env_stats.items():
+        for act in INTERESTING_ACTIONS:
+            if act in stats:
+                interesting_envs.add(env)
+                break
+
+    simplified_results = {}  # testname -> env -> action
+    for testname, items in sorted(per_test_per_env_stats.items()):
+        per_testname_result = simplified_results.setdefault(testname, {})
+        # first select tests with interesting actions (anything but pass or skip)
+        for env, counts in items.items():
+            for action in INTERESTING_ACTIONS:
+                if action in counts:
+                    per_testname_result.setdefault(env, action)
+                    break
+
+        # Once we know test is interesting, complete the row
+        if per_testname_result:
+            for env, counts in items.items():
+                if env not in interesting_envs:
+                    continue
+                for action in (PASS, SKIP):
+                    if action in counts:
+                        per_testname_result.setdefault(env, action)
+                        break
+
+        if not per_testname_result:
+            per_testname_result = simplified_results.pop(testname)
+
+    table = []
+    for testname, items in simplified_results.items():
+        table.append(
+            {
+                "Test Name": testname,
+                **items,
+            }
+        )
+    print_table(table, markdown=markdown)
+
+    if show_output:
+        for testname, stats in simplified_results.items():
+            for env, action in stats.items():
+                if action not in INTERESTING_ACTIONS:
+                    continue
+                out = "\n".join(outputs.get(testname, {}).get(env, []))
+                if markdown:
+                    print(f"### {env} {testname} {action}\n```\n{out}\n```")
+                else:
+                    print(f"### {env} {testname} {action}\n{out}")
+                if out:
+                    print()
+
+
+def print_table(table, columns=None, markdown=False):
+    """
+    Pretty-print a list-of-dicts as an aligned text table.
+
+    Args:
+        table (list[dict]): the data rows
+        columns (list[str]): header names & column order
+        markdown (bool): whether to output in markdown format
+    """
+    if not table:
+        return
+
+    if columns is None:
+        columns = []
+        seen = set()
+        for row in table:
+            for key in row:
+                if key in seen:
+                    continue
+                seen.add(key)
+                columns.append(key)
+        columns.sort()
+
+    widths = [len(col) for col in columns]
+    for row in table:
+        for i, col in enumerate(columns):
+            widths[i] = max(widths[i], len(str(row.get(col, ""))))
+
+    if markdown:
+        # Header
+        print("| " + " | ".join(str(col).ljust(w) for col, w in zip(columns, widths)) + " |")
+        # Separator
+        print("| " + " | ".join("-" * w for w in widths) + " |")
+        # Data rows
+        for row in table:
+            print("| " + " | ".join(str(row.get(col, "")).ljust(w) for col, w in zip(columns, widths)) + " |")
+    else:
+        fmt = lambda cells: "  ".join(str(cell).ljust(w) for cell, w in zip(cells, widths))
+        print(fmt(columns))
+        for ind, row in enumerate(table):
+            print(fmt([row.get(col, "") for col in columns]))
+
+    print()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filenames", nargs="+", help="Filenames or directories to parse")
+    parser.add_argument("--filter", help="Filter results by test name (substring match)")
+    parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
+    parser.add_argument("--output", help="Show output for failed tests", action="store_true")
+    parser.add_argument("--markdown", help="Output in GitHub-flavored markdown format", action="store_true")
+    args = parser.parse_args()
+    print_report(args.filenames, filter=args.filter, filter_env=args.filter_env, show_output=args.output, markdown=args.markdown)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/gh_report.py b/tools/gh_report.py
new file mode 100755
index 0000000000..b84c25e53e
--- /dev/null
+++ b/tools/gh_report.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+"""
+Download integration logs artifacts for a given run id (--run RUNID) or commit (--commit) and call gh_parse.py on those to print the report.
+
+If neither --commit nor --run are passed, will use either current PR or HEAD.
+"""
+
+import sys
+import os
+import subprocess
+import argparse
+import json
+import pprint
+from pathlib import Path
+
+
+CLI_REPO = "databricks/cli"
+DECO_REPO = os.environ.get("DECO_REPO") or os.environ.get("GITHUB_REPOSITORY")
+DECO_TESTS_PREFIX = "https://go/deco-tests/"
+CLI_TESTS_PREFIX = "https://github.com/databricks/cli/actions/runs/"
+DIRECTORY = Path(__file__).parent
+PARSE_SCRIPT = DIRECTORY / "gh_parse.py"
+
+try:
+    PARSE_SCRIPT = PARSE_SCRIPT.relative_to(os.getcwd())
+except Exception:
+    pass  # keep absolute
+
+
+def run(cmd, shell=False):
+    sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    return subprocess.run(cmd, check=True, shell=False)
+
+
+def run_text(cmd, print_command=False):
+    if print_command:
+        sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8", check=True)
+    return result.stdout.strip()
+
+
+def run_json(cmd):
+    sys.stderr.write("+ " + " ".join(cmd) + "\n")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8", check=True)
+
+    try:
+        return json.loads(result.stdout)
+    except Exception:
+        sys.stderr.write(f"Failed to parse JSON:\n{result.stdout}\n")
+        raise
+
+
+def current_branch():
+    return run_text("git branch --show-current".split())
+
+
+def get_run_id_from_items(items, field, prefix, data):
+    found = set()
+
+    for item in items or []:
+        url = item.get(field, "")
+        if url.startswith(prefix):
+            run_id = url.removeprefix(prefix).split("/")[0]
+            assert run_id.isdigit(), url
+            found.add(int(run_id))
+
+    found = sorted(found)
+
+    if not found:
+        print(pprint.pformat(data), flush=True, file=sys.stderr)
+        sys.exit(f"run_id not found (search: {field=} {prefix=})")
+    elif len(found) > 1:
+        print(f"many run_ids (search: {field=} {prefix=}): {found}", file=sys.stderr, flush=True)
+
+    return found[-1]
+
+
+def get_pr_run_id_integration():
+    data = run_json("gh pr status --json statusCheckRollup".split())
+    items = data.get("currentBranch", {}).get("statusCheckRollup")
+    return get_run_id_from_items(items, "targetUrl", DECO_TESTS_PREFIX, data)
+
+
+def get_commit_run_id_integration(commit):
+    data = run_json(["gh", "api", f"repos/databricks/cli/commits/{commit}/status"])
+    items = data.get("statuses")
+    return get_run_id_from_items(items, "target_url", DECO_TESTS_PREFIX, data)
+
+
+def get_pr_run_id_unit():
+    data = run_json("gh pr status --json statusCheckRollup".split())
+    items = data.get("currentBranch", {}).get("statusCheckRollup")
+    items = [x for x in items if x.get("workflowName") == "build"]
+    return get_run_id_from_items(items, "detailsUrl", CLI_TESTS_PREFIX, data)
+
+
+def get_commit_run_id_unit(commit):
+    data = run_json(["gh", "run", "list", "-c", commit, "--json", "databaseId,workflowName"])
+    results = []
+    try:
+        for item in data:
+            if item["workflowName"] == "build":
+                results.append(int(item["databaseId"]))
+        results.sort()
+        assert len(results) == 1, results
+    except Exception:
+        print(pprint.pformat(data), flush=True, file=sys.stderr)
+        if not results:
+            raise
+
+    return results[-1]
+
+
+def download_run_id(run_id, repo, rm):
+    target_dir = f".gh-logs/{run_id}"
+    if os.path.exists(target_dir):
+        if rm:
+            run(["rm", "-fr", target_dir])
+        else:
+            print(f"Already exists: {target_dir}. If that directory contains partial results, delete it to re-download: rm -fr .gh-logs/{run_id}")
+            return target_dir
+    cmd = ["gh", "run", "-R", repo, "download", str(run_id), "-D", target_dir]
+    run(cmd)
+    return target_dir
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--run", type=int, help="Github run_id to load")
+    parser.add_argument("--commit", help="Commit to get run_id from. If not set, getting either PR status or most recent commit")
+    parser.add_argument("--rm", help="Remove previously downloaded files first", action="store_true")
+    parser.add_argument("--filter", help="Filter results by test name (substring match)")
+    parser.add_argument("--filter-env", help="Filter results by env name (substring match)")
+    parser.add_argument("--output", help="Show output for failing tests", action="store_true")
+    parser.add_argument("--markdown", help="Output in GitHub-flavored markdown format", action="store_true")
+
+    # This does not work because we don't store artifacts for unit tests. We could download logs instead but that requires different parsing method:
+    # ~/work/cli % gh api -H "Accept: application/vnd.github+json" /repos/databricks/cli/actions/runs/15827411452/logs  > logs.zip
+    parser.add_argument("--unit", action="store_true", help="Extract run_id for unit tests rather than integration tests (not working)")
+    args = parser.parse_args()
+
+    repo = CLI_REPO if args.unit else DECO_REPO
+    assert repo
+
+    if not args.run and not args.commit:
+        if current_branch() == "main":
+            args.commit = run_text("git rev-parse --short HEAD".split())
+        else:
+            if args.unit:
+                args.run = get_pr_run_id_unit()
+            else:
+                args.run = get_pr_run_id_integration()
+
+    if args.commit:
+        assert not args.run
+        if args.unit:
+            args.run = get_commit_run_id_unit(args.commit)
+        else:
+            args.run = get_commit_run_id_integration(args.commit)
+
+    target_dir = download_run_id(args.run, repo, rm=args.rm)
+    print(flush=True)
+    cmd = [sys.executable, str(PARSE_SCRIPT)]
+    if args.filter:
+        cmd.append(f"--filter {args.filter}")
+    if args.filter_env:
+        cmd.append(f"--filter-env {args.filter_env}")
+    if args.output:
+        cmd.append(f"--output")
+    if args.markdown:
+        cmd.append(f"--markdown")
+    cmd.append(f"{target_dir}")
+    run(cmd, shell=True)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except subprocess.CalledProcessError as ex:
+        sys.exit(ex)