From ed49e804c5baf989e88567dcc795a2a20a5a558a Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 20 May 2026 04:19:27 +0000 Subject: [PATCH 1/3] [INFRA] Auto-close non-default-branch PRs in merge_spark_pr.py ### What changes were proposed in this pull request? When `dev/merge_spark_pr.py` merges a PR whose target branch is not the repository's default (e.g. backport PRs against `branch-X.Y`), explicitly close the PR through the GitHub REST API after the push succeeds. Specifically: - Add a small `close_pr(pr_num)` helper that issues an authenticated `PATCH /pulls/{n}` with `{"state": "closed"}`. - Plumb a `default_branch` parameter into `merge_pr()`. `main()` fetches it via `GET /repos/apache/spark` once and passes it in. - After a successful `git push` in `merge_pr()`, if `target_ref != default_branch`, call `close_pr(pr_num)`. ### Why are the changes needed? The squash-merge commit message already contains `Closes #N from ...`, which GitHub treats as a closing keyword. However, GitHub honors closing keywords **only when the commit lands on the repository's default branch**. Backport PRs that target `branch-X.Y` therefore got merged successfully but stayed open on GitHub, and the committer had to close them manually. PR #56004 was a recent example. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Compile-checked with `python3 -m py_compile dev/merge_spark_pr.py` and the existing doctests still pass via `python3 -m doctest dev/merge_spark_pr.py`. End-to-end merge behavior will be validated the next time a committer runs the script on a backport PR. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Claude Code (Claude Opus 4.7) --- dev/merge_spark_pr.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 795fde6d176e2..a0ee6337ad572 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -345,6 +345,21 @@ def get_json(url): sys.exit(-1) +def close_pr(pr_num): + url = "%s/pulls/%s" % (GITHUB_API_BASE, pr_num) + data = json.dumps({"state": "closed"}).encode("utf-8") + request = Request(url, data=data, method="PATCH") + request.add_header("Content-Type", "application/json") + request.add_header("Accept", "application/vnd.github+json") + if GITHUB_OAUTH_KEY: + request.add_header("Authorization", "token %s" % GITHUB_OAUTH_KEY) + try: + return json.load(urlopen(request)) + except HTTPError as e: + print_error("Failed to close PR #%s: HTTP %s %s" % (pr_num, e.code, e.reason)) + return None + + def fail(msg): print_error(msg) clean_up() @@ -383,7 +398,7 @@ def clean_up(): # merge the requested PR and return the merge hash -def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors): +def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors, default_branch): pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) @@ -452,6 +467,14 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_author clean_up() print("Pull request #%s merged!" % pr_num) print("Merge hash: %s" % merge_hash) + + # The "Closes #N" keyword in the commit message only auto-closes the PR when the commit + # lands on the default branch. For merges into other branches (e.g. branch-X.Y backport + # PRs), close the PR explicitly through the API. + if target_ref != default_branch: + print("Target branch %s is not the default branch; closing PR #%s." % (target_ref, pr_num)) + close_pr(pr_num) + return merge_hash @@ -818,6 +841,7 @@ def main(): branch_names = list(filter(lambda x: x.startswith("branch-"), [x["name"] for x in branches])) branch_names = sorted(branch_names, key=semver_branch_rank, reverse=True) branch_iter = iter(branch_names) + default_branch = get_json(GITHUB_API_BASE)["default_branch"] if len(sys.argv) == 1: pr_num = bold_input("Which pull request would you like to merge? (e.g. 34): ") @@ -951,7 +975,9 @@ def main(): merged_refs = [target_ref] - merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors) + merge_hash = merge_pr( + pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors, default_branch + ) pick_prompt = "Would you like to pick %s into another branch?" % merge_hash while bold_input("\n%s (y/N): " % pick_prompt).lower() == "y": From bb68bec16ae3022e379ff30829e831f47cd94e3b Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 20 May 2026 08:55:39 +0000 Subject: [PATCH 2/3] Check PR state after push instead of comparing target_ref to default_branch Replace the `target_ref != default_branch` guard with a direct `get_json("/pulls/N")["state"] != "closed"` check after the push. This drops the extra `default_branch` parameter on `merge_pr()` and the `GET /repos/apache/spark` fetch in `main()`, and is self-correcting for any edge case where GitHub's auto-close does not fire. Generated-by: Claude Code (Claude Opus 4.7) --- dev/merge_spark_pr.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index a0ee6337ad572..cd80169496c6a 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -398,7 +398,7 @@ def clean_up(): # merge the requested PR and return the merge hash -def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors, default_branch): +def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors): pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) @@ -470,9 +470,10 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_author # The "Closes #N" keyword in the commit message only auto-closes the PR when the commit # lands on the default branch. For merges into other branches (e.g. branch-X.Y backport - # PRs), close the PR explicitly through the API. - if target_ref != default_branch: - print("Target branch %s is not the default branch; closing PR #%s." % (target_ref, pr_num)) + # PRs), GitHub leaves the PR open, so close it explicitly through the API. + pr_state = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)).get("state") + if pr_state != "closed": + print("PR #%s is still open after push; closing it explicitly." % pr_num) close_pr(pr_num) return merge_hash @@ -841,7 +842,6 @@ def main(): branch_names = list(filter(lambda x: x.startswith("branch-"), [x["name"] for x in branches])) branch_names = sorted(branch_names, key=semver_branch_rank, reverse=True) branch_iter = iter(branch_names) - default_branch = get_json(GITHUB_API_BASE)["default_branch"] if len(sys.argv) == 1: pr_num = bold_input("Which pull request would you like to merge? (e.g. 34): ") @@ -975,9 +975,7 @@ def main(): merged_refs = [target_ref] - merge_hash = merge_pr( - pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors, default_branch - ) + merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors) pick_prompt = "Would you like to pick %s into another branch?" % merge_hash while bold_input("\n%s (y/N): " % pick_prompt).lower() == "y": From 3d8961c6b3a790921ff23aa2d3dc7d92bae4743a Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 20 May 2026 08:59:34 +0000 Subject: [PATCH 3/3] Move the explicit close-PR step out of merge_pr() into main() merge_pr() now stops at "Pull request merged!" again. The state-check + close_pr() call moves to main(), right after the merge_pr() call returns, which keeps merge_pr() focused on the local-merge+push mechanics and surfaces the close behavior at the top level where the JIRA / cherry-pick prompts also live. Generated-by: Claude Code (Claude Opus 4.7) --- dev/merge_spark_pr.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index cd80169496c6a..b630e13b968c7 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -467,15 +467,6 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_author clean_up() print("Pull request #%s merged!" % pr_num) print("Merge hash: %s" % merge_hash) - - # The "Closes #N" keyword in the commit message only auto-closes the PR when the commit - # lands on the default branch. For merges into other branches (e.g. branch-X.Y backport - # PRs), GitHub leaves the PR open, so close it explicitly through the API. - pr_state = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)).get("state") - if pr_state != "closed": - print("PR #%s is still open after push; closing it explicitly." % pr_num) - close_pr(pr_num) - return merge_hash @@ -977,6 +968,14 @@ def main(): merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc, pr_author, co_authors) + # The "Closes #N" keyword in the commit message only auto-closes the PR when the commit + # lands on the default branch. For merges into other branches (e.g. branch-X.Y backport + # PRs), GitHub leaves the PR open, so close it explicitly through the API. + pr_state = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)).get("state") + if pr_state != "closed": + print("PR #%s is still open after push; closing it explicitly." % pr_num) + close_pr(pr_num) + pick_prompt = "Would you like to pick %s into another branch?" % merge_hash while bold_input("\n%s (y/N): " % pick_prompt).lower() == "y": merged_refs = merged_refs + [