Skip to content

Commit

Permalink
[SPARK-1684] [PROJECT INFRA] Merge script should standardize SPARK-XX…
Browse files Browse the repository at this point in the history
…X prefix

Cleans up the pull request title in the merge script to follow conventions outlined in the wiki under Contributing Code.
https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingCode

[MODULE] SPARK-XXXX: Description

Author: texasmichelle <texasmichelle@gmail.com>

Closes apache#5149 from texasmichelle/master and squashes the following commits:

9b6b0a7 [texasmichelle] resolved variable scope issue
7d5fa20 [texasmichelle] only prompt if title has been modified
8c195bb [texasmichelle] removed erroneous line
4f1ed46 [texasmichelle] Deque removal, logic simplifications, & prompt user to pick a title (orig or modified)
df73f6a [texasmichelle] reworked regex's to enforce brackets around JIRA ref
43b5aed [texasmichelle] Merge remote-tracking branch 'apache/master'
25229c6 [texasmichelle] Merge remote-tracking branch 'apache/master'
aa20a6e [texasmichelle] Move code into main() and add doctest for new text parsing method
48520ba [texasmichelle] SPARK-1684: Corrected import statement
042099d [texasmichelle] SPARK-1684 Merge script should standardize SPARK-XXX prefix
8f4a7d1 [texasmichelle] SPARK-1684 Merge script should standardize SPARK-XXX prefix
  • Loading branch information
texasmichelle authored and pwendell committed Apr 22, 2015
1 parent 41ef78a commit a0761ec
Showing 1 changed file with 140 additions and 59 deletions.
199 changes: 140 additions & 59 deletions dev/merge_spark_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@
# Prefix added to temporary branches
BRANCH_PREFIX = "PR_TOOL"

os.chdir(SPARK_HOME)


def get_json(url):
try:
Expand Down Expand Up @@ -85,10 +83,6 @@ def continue_maybe(prompt):
if result.lower() != "y":
fail("Okay, exiting")


original_head = run_cmd("git rev-parse HEAD")[:8]


def clean_up():
print "Restoring head pointer to %s" % original_head
run_cmd("git checkout %s" % original_head)
Expand All @@ -101,7 +95,7 @@ def clean_up():


# merge the requested PR and return the merge hash
def merge_pr(pr_num, target_ref):
def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper())
run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name))
Expand Down Expand Up @@ -274,7 +268,7 @@ def get_version_json(version_str):
asf_jira.transition_issue(
jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)

print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)


def resolve_jira_issues(title, merge_branches, comment):
Expand All @@ -286,68 +280,155 @@ def resolve_jira_issues(title, merge_branches, comment):
resolve_jira_issue(merge_branches, comment, jira_id)


branches = get_json("%s/branches" % GITHUB_API_BASE)
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
# Assumes branch names can be sorted lexicographically
latest_branch = sorted(branch_names, reverse=True)[0]

pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
def standardize_jira_ref(text):
"""
Standardize the [SPARK-XXXXX] [MODULE] prefix
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
>>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
'[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful'
>>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
'[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in pull requests'
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
'[SPARK-5954] [MLLIB] Top by key'
>>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
'[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
>>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
'[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
>>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
'[SPARK-1146] [WIP] Vagrant support for Spark'
>>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
'[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
>>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
'[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved words in DDL parser.'
>>> standardize_jira_ref("Additional information for users building from source code")
'Additional information for users building from source code'
"""
jira_refs = []
components = []

# If the string is compliant, no need to process any further
if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)):
return text

# Extract JIRA ref(s):
pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
for ref in pattern.findall(text):
# Add brackets, replace spaces with a dash, & convert to uppercase
jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
text = text.replace(ref, '')

# Extract spark component(s):
# Look for alphanumeric chars, spaces, dashes, periods, and/or commas
pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
for component in pattern.findall(text):
components.append(component.upper())
text = text.replace(component, '')

# Cleanup any remaining symbols:
pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
if (pattern.search(text) is not None):
text = pattern.search(text).groups()[0]

# Assemble full text (JIRA ref(s), module(s), remaining text)
clean_text = ' '.join(jira_refs).strip() + " " + ' '.join(components).strip() + " " + text.strip()

# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
clean_text = re.sub(r'\s+', ' ', clean_text.strip())

return clean_text

def main():
global original_head

os.chdir(SPARK_HOME)
original_head = run_cmd("git rev-parse HEAD")[:8]

branches = get_json("%s/branches" % GITHUB_API_BASE)
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
# Assumes branch names can be sorted lexicographically
latest_branch = sorted(branch_names, reverse=True)[0]

pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))

url = pr["url"]

# Decide whether to use the modified title or not
modified_title = standardize_jira_ref(pr["title"])
if modified_title != pr["title"]:
print "I've re-written the title as follows to match the standard format:"
print "Original: %s" % pr["title"]
print "Modified: %s" % modified_title
result = raw_input("Would you like to use the modified title? (y/n): ")
if result.lower() == "y":
title = modified_title
print "Using modified title:"
else:
title = pr["title"]
print "Using original title:"
print title
else:
title = pr["title"]

url = pr["url"]
title = pr["title"]
body = pr["body"]
target_ref = pr["base"]["ref"]
user_login = pr["user"]["login"]
base_ref = pr["head"]["ref"]
pr_repo_desc = "%s/%s" % (user_login, base_ref)
body = pr["body"]
target_ref = pr["base"]["ref"]
user_login = pr["user"]["login"]
base_ref = pr["head"]["ref"]
pr_repo_desc = "%s/%s" % (user_login, base_ref)

# Merged pull requests don't appear as merged in the GitHub API;
# Instead, they're closed by asfgit.
merge_commits = \
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
# Merged pull requests don't appear as merged in the GitHub API;
# Instead, they're closed by asfgit.
merge_commits = \
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]

if merge_commits:
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
if merge_commits:
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]

print "Pull request %s has already been merged, assuming you want to backport" % pr_num
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
print "Pull request %s has already been merged, assuming you want to backport" % pr_num
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
"%s^{commit}" % merge_hash]).strip() != ""
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)

print "Found commit %s:\n%s" % (merge_hash, message)
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)
print "Found commit %s:\n%s" % (merge_hash, message)
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)

if not bool(pr["mergeable"]):
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
"Continue? (experts only!)"
continue_maybe(msg)
if not bool(pr["mergeable"]):
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
"Continue? (experts only!)"
continue_maybe(msg)

print ("\n=== Pull Request #%s ===" % pr_num)
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
print ("\n=== Pull Request #%s ===" % pr_num)
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)

merged_refs = [target_ref]
merged_refs = [target_ref]

merge_hash = merge_pr(pr_num, target_ref)
merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc)

pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]

if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
else:
print "JIRA_USERNAME and JIRA_PASSWORD not set"
print "Exiting without trying to close the associated JIRA."
else:
print "JIRA_USERNAME and JIRA_PASSWORD not set"
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
print "Exiting without trying to close the associated JIRA."
else:
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
print "Exiting without trying to close the associated JIRA."

if __name__ == "__main__":
import doctest
doctest.testmod()

main()

0 comments on commit a0761ec

Please sign in to comment.