Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 95 additions & 68 deletions dev/merge_spark_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@
# absorb work for the next minors on major M. Maintenance branches branch-M.N are cut from
# branch-M.x. Cherry-pick prompts sort by semver via semver_branch_rank (branch-M.x before branch-M.N).
#
# Fix Version defaults when resolving JIRA: master contributes the largest unreleased N.0.0.
# Master + branch-M.x also defaults per listed integration branch to that major's unreleased Spark
# minor line versions (multiple branch-M.x entries each add one; e.g. 6.0.0 + 5.2.0 + 4.3.0).
# Fix Version defaults when resolving JIRA aim for the minimal set of versions that tells users
# which Spark releases will contain the commit, leveraging the Upstream-First backporting policy
# (cherry-picks flow master -> branch-M.x -> branch-M.N). master contributes the greatest
# unreleased N.0.0, but is omitted when any branch-M.x is in the merge set (a cherry-pick to
# branch-M.x has already landed on master). Each branch-M.x contributes that major's greatest
# unreleased minor.0, but is omitted when a sibling branch-M.N contributes M.N.0 (M.N.0 still
# unreleased means branch-M.N is still a subset of branch-M.x). Patch merges on branch-M.N use
# unreleased versions starting with M.N.
# This utility assumes you already have a local Spark git folder and that you
# have added remotes corresponding to both (i) the github apache Spark
# mirror and (ii) the apache git repo.
Expand Down Expand Up @@ -151,36 +156,6 @@ def _sort_version_names_lex_desc(names):
return sorted(names, reverse=True)


def _dedupe_fix_versions_adjacent_major_zero(default_fix_versions):
"""Preserve order; drop duplicate names, then drop V when V is M.minor.0 and
predecessor (minor-1).0.0 exists in the list.

>>> _dedupe_fix_versions_adjacent_major_zero(["6.0.0", "4.3.0", "4.3.0"])
['6.0.0', '4.3.0']
>>> _dedupe_fix_versions_adjacent_major_zero(["2.0.0", "1.1.0", "1.0.0"])
['2.0.0', '1.0.0']
>>> _dedupe_fix_versions_adjacent_major_zero(["1.0.0", "2.0.0"])
['1.0.0', '2.0.0']
"""
out = list(dict.fromkeys(default_fix_versions))
for v in list(out):
major, minor, patch = v.split(".")
if patch == "0":
previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
if previous in out:
out = list(filter(lambda x: x != v, out))
return out


def _append_max_fix_or_warn(defaults, warnings, candidates, warn_if_none):
"""Append _semver_max_version(candidates) when defined; else optionally append warn_if_none."""
chosen = _semver_max_version(candidates)
if chosen:
defaults.append(chosen)
elif warn_if_none is not None:
warnings.append(warn_if_none)


def compute_merge_default_fix_versions(merge_branches, unreleased_version_names):
"""
Build suggested SPARK Fix Version names from merged git refs (no JIRA I/O).
Expand All @@ -189,22 +164,29 @@ def compute_merge_default_fix_versions(merge_branches, unreleased_version_names)

Returns (default_version_names, warning_messages).

Master always suggests the greatest unreleased N.0.0. Each branch-M.x with master also adds the
greatest unreleased Spark M.*.* Fix Version for that major (multiple branch-M.x entries each
contribute). Patch merges on branch-M.N use unreleased versions starting with M.N.
The output is the minimal set of Fix Versions that tells users which Spark releases will
contain the commit, leveraging the Upstream-First backporting policy (cherry-picks flow
master -> branch-M.x -> branch-M.N):
- master contributes the greatest unreleased N.0.0;
- branch-M.x with master contributes that major's greatest unreleased minor.0;
- branch-M.N contributes its greatest unreleased M.N.patch.
Redundant entries are then suppressed: master's N.0.0 is dropped when any branch-M.x is in
the merge set (a cherry-pick to branch-M.x has already landed on master); branch-M.x's
minor.0 is dropped when a branch-M.N for the same major contributes M.N.0 (M.N.0 still
unreleased means branch-M.N is still a subset of branch-M.x).

>>> compute_merge_default_fix_versions(["master"], ["4.3.0", "5.0.0", "6.0.0"])[0]
['6.0.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.x"], ["6.0.0", "5.0.0", "4.3.0", "4.2.1"])[0]
['6.0.0', '4.3.0']
['4.3.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-5.x"],
... ["7.0.0", "6.0.0", "5.2.0", "5.1.2", "5.1.1", "5.0.1"],
... )[0]
['7.0.0', '5.2.0']
['5.2.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-3.5"],
Expand Down Expand Up @@ -232,58 +214,103 @@ def compute_merge_default_fix_versions(merge_branches, unreleased_version_names)
>>> compute_merge_default_fix_versions(
... ["master", "branch-5.x", "branch-4.x"], ["5.2.0", "4.3.0", "6.0.0"]
... )[0]
['6.0.0', '5.2.0', '4.3.0']
['5.2.0', '4.3.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.x", "branch-4.x"], ["6.0.0", "4.3.0"]
... )[0]
['6.0.0', '4.3.0']
['4.3.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.x", "branch-4.2"], ["5.0.0", "4.3.0", "4.2.0"]
... )[0]
['4.2.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.x", "branch-4.2"], ["5.0.0", "4.3.0", "4.2.1"]
... )[0]
['4.3.0', '4.2.1']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.2"], ["5.0.0", "4.2.0"]
... )[0]
['5.0.0', '4.2.0']

>>> compute_merge_default_fix_versions(
... ["master", "branch-4.2"], ["5.0.0", "4.2.1"]
... )[0]
['5.0.0', '4.2.1']
"""
names = _sort_version_names_lex_desc(list(unreleased_version_names))
merged_set = set(merge_branches)
has_integration_branch = any(
_integration_major_from_branch(b) is not None for b in merge_branches
)

defaults = []
# Collect each merge branch's candidate Fix Version. We track (branch, version) so we can
# later suppress redundant entries (master, branch-M.x) based on which branches contributed.
contributions = []
warnings = []

for b in merge_branches:
if b == "master":
majors = [n for n in names if re.match(r"^\d+\.0\.0$", n)]
_append_max_fix_or_warn(
defaults,
warnings,
majors,
"No unreleased N.0.0 Fix Version found in JIRA for master; "
"enter comma-separated Fix Version(s) manually when prompted.",
)
chosen = _semver_max_version(majors)
if chosen:
contributions.append((b, chosen))
else:
warnings.append(
"No unreleased N.0.0 Fix Version found in JIRA for master; "
"enter comma-separated Fix Version(s) manually when prompted."
)
continue
line_major = _integration_major_from_branch(b)
if line_major is not None:
if "master" in merged_set:
line_versions = [n for n in names if re.match(r"^%s\.\d+\.\d+$" % line_major, n)]
_append_max_fix_or_warn(
defaults,
warnings,
line_versions,
(
"Could not infer an unreleased Spark %s (minor.maintenance) fix version "
"for branch-%s.x + master merge; enter version(s) manually when prompted."
% (line_major, line_major)
),
if "master" not in merge_branches:
continue
line_versions = [n for n in names if re.match(r"^%s\.\d+\.\d+$" % line_major, n)]
chosen = _semver_max_version(line_versions)
if chosen:
contributions.append((b, chosen))
else:
warnings.append(
"Could not infer an unreleased Spark %s (minor.maintenance) fix version "
"for branch-%s.x + master merge; enter version(s) manually when prompted."
% (line_major, line_major)
)
continue
prefix = b.replace("branch-", "")
found_versions = [n for n in names if n.startswith(prefix)]
_append_max_fix_or_warn(
defaults,
warnings,
found_versions,
(
chosen = _semver_max_version(found_versions)
if chosen:
contributions.append((b, chosen))
else:
warnings.append(
"Target version for %s is not found on JIRA, it may be archived or "
"not created. Skipping it." % b
),
)
)

return _dedupe_fix_versions_adjacent_major_zero(defaults), warnings
# Majors where a release branch's M.N.0 contribution makes the sibling branch-M.x's
# minor.0 redundant: an unreleased M.N.0 means branch-M.N has not diverged from branch-M.x
# yet, so anything that's on branch-M.N is also on branch-M.x.
suppress_integration_majors = set()
for b, v in contributions:
if b == "master" or _integration_major_from_branch(b) is not None:
continue
m = re.match(r"^(\d+)\.\d+\.0$", v)
if m:
suppress_integration_majors.add(int(m.group(1)))

def keep(item):
b, _ = item
if b == "master":
return not has_integration_branch
major = _integration_major_from_branch(b)
if major is not None:
return major not in suppress_integration_majors
return True

filtered = [item for item in contributions if keep(item)]
return list(dict.fromkeys(v for _, v in filtered)), warnings


def print_error(msg):
Expand Down