diff --git a/scripts/release-notes.py b/scripts/release-notes.py index 7cce70ef..b10184ff 100755 --- a/scripts/release-notes.py +++ b/scripts/release-notes.py @@ -1,18 +1,94 @@ #!/usr/bin/env python3 import argparse -import json +import fnmatch import os import pathlib import re import subprocess -import tempfile +import sys +import time import webbrowser + +REPLICA_TEAMS = set( + [ + "consensus-owners", + "crypto-owners", + "Orchestrator", + "message-routing-owners", + "networking-team", + "execution-owners", + "node-team", + "runtime-owners", + ] +) + +TYPE_PRETTY_MAP = { + "feat": ("Features", 0), + "fix": ("Bugfixes", 1), + "docs": ("Documentation", 6), + "refactor": ("Refactoring", 4), + "perf": ("Performance improvements", 2), + "test": ("Tests", 5), + "chore": ("Chores", 3), + "other": ("Other changes", 7), + "excluded": ("Excluded changes", 8), +} + +TEAM_PRETTY_MAP = { + "DRE": "DRE", + "boundarynode-team": "Boundary Nodes", + "chross-chain-team": "Cross Chain", + "consensus-owners": "Consensus", + "cross-chain-team": "Cross Chain", + "crypto-owners": "Crypto", + "docs-owners": "Docs", + "execution-owners": "Execution", + "financial-integrations": "Financial Integrations", + "ghost": "Ghost", + "ic-support-eu": "SupportEU", + "ic-support-na": "SupportNA", + "ic-testing-verification": "T&V", + "idx": "IDX", + "interface-owners": "Interface", + "message-routing-owners": "Message Routing", + "networking-team": "Networking", + "nns-team": "NNS", + "node-team": "Node", + "owners-owners": "Owners", + "platform-operations": "PfOps", + "prodsec": "Prodsec", + "runtime-owners": "Runtime", + "trust-team": "Trust", +} + + +EXCLUDE_PACKAGES_FILTERS = [ + r".+\/sns\/.+", + r".+\/ckbtc\/.+", + r".+\/cketh\/.+", + r".+canister.+", + r"rs\/nns.+", + r".+test.+", + r"^bazel$", +] + +EXCLUDED_TEAMS = set(TEAM_PRETTY_MAP.keys()) - REPLICA_TEAMS + +# Ownership threshold for analyzing which teams were +# involved in the commit +MAX_OWNERSHIP_AREA = 0.5 + parser = argparse.ArgumentParser(description="Generate release notes") parser.add_argument("first_commit", type=str, help="first commit") parser.add_argument("last_commit", type=str, help="last commit") -parser.add_argument("--max-commits", dest="max_commits", default=1000, help="maximum number of commits to fetch") -parser.add_argument("--branch", dest="branch", help="branch to fetch commits from") +parser.add_argument( + "--max-commits", + dest="max_commits", + default=1000, + help="maximum number of commits to fetch", +) +parser.add_argument("--branch", dest="branch", default="master", help="branch to fetch commits from") parser.add_argument( "--html", type=str, @@ -24,22 +100,32 @@ args = parser.parse_args() max_commits = os.environ.get("MAX_COMMITS", args.max_commits) -branch = os.environ.get("BRANCH") or args.branch or args.rc_name +branch = os.environ.get("BRANCH", args.branch) + + +# https://stackoverflow.com/a/34482761 +def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+ + count = len(it) + start = time.time() -def get_merge_commit(repo_dir, commit_hash, branch): - relevant_commits = list(enumerate(get_ancestry_path(repo_dir, commit_hash, branch))) - relevant_commits += list(enumerate(get_first_parent(repo_dir, commit_hash, branch))) - relevant_commits = sorted(relevant_commits, key=lambda index_commit: index_commit[1]) - checked_commits = set() - commits = [] - for index, commit in relevant_commits: - if commit not in checked_commits: - checked_commits.add(commit) - commits.append((index, commit)) + def show(j, item): + x = int(size * j / count) + remaining = ((time.time() - start) / j) * (count - j) - relevant_commits = sorted(commits, key=lambda index_commit: index_commit[0]) + mins, sec = divmod(remaining, 60) + time_str = f"{int(mins):02}:{sec:05.2f}" - return relevant_commits[-1][1] + print( + f"{prefix}{item} [{'█'*x}{('.'*(size-x))}] {j}/{count} Est wait {time_str}", + end="\r", + file=out, + flush=True, + ) + + for i, item in enumerate(it): + yield i, item + show(i + 1, item) + print("\n", flush=True, file=out) def get_ancestry_path(repo_dir, commit_hash, branch): @@ -89,14 +175,93 @@ def get_commits_info(git_commit_format): "--format={}".format(git_commit_format), "--no-merges", "{}..{}".format(first_commit, last_commit), - ] + ], + stderr=subprocess.DEVNULL, ) .decode("utf-8") .strip() .split("\n") ) - return list(zip(get_commits_info("%h"), get_commits_info("%cD"), get_commits_info("%an"), get_commits_info("%s"))) + commit_hashes = get_commits_info("%h") + commit_messages = get_commits_info("%s") + commiters = get_commits_info("%an") + + return list(zip(commit_hashes, commit_messages, commiters)) + + +def file_changes_for_commit(commit_hash, repo_dir): + cmd = [ + "git", + "diff", + "--numstat", + f"{commit_hash}^..{commit_hash}", + ] + diffstat_output = ( + subprocess.check_output( + cmd, + cwd=repo_dir, + stderr=subprocess.DEVNULL, + ) + .decode() + .strip() + ) + + parts = diffstat_output.splitlines() + changes = [] + for line in parts: + file_path = line.split()[2].strip() + additions = line.split()[0].strip() + deletions = line.split()[1].strip() + additions = additions if additions != "-" else "0" + deletions = deletions if deletions != "-" else "0" + + changes.append( + { + "file_path": "/" + file_path, + "num_changes": int(additions) + int(deletions), + } + ) + + return changes + + +def parse_codeowners(codeowners_path): + with open(codeowners_path) as f: + codeowners = f.readlines() + filtered = [line.strip() for line in codeowners] + filtered = [line for line in filtered if line and not line.startswith("#")] + parsed = {} + for line in filtered: + result = line.split() + teams = [team.split("@dfinity-lab/teams/")[1] for team in result[1:]] + pattern = result[0] + pattern = pattern if pattern.startswith("/") else "/" + pattern + pattern = pattern if not pattern.endswith("/") else pattern + "*" + + parsed[pattern] = teams + + return parsed + + +def parse_conventional_commit(message, pattern): + match = pattern.match(message) + + if match: + commit_type = match.group(1) + commit_scope = match.group(2)[1:-1] if match.group(2) else None + commit_message = match.group(3) + return {"type": commit_type, "scope": commit_scope, "message": commit_message} + return {"type": "other", "scope": None, "message": message} + + +def best_matching_regex(file_path, regex_list): + matches = [(regex, fnmatch.fnmatch(file_path, regex)) for regex in regex_list] + matches = [match for match in matches if match[1]] + if len(matches) == 0: + return None + matches = list(reversed([match[0] for match in matches])) + return matches[0] def main(): @@ -104,82 +269,145 @@ def main(): last_commit = args.last_commit html_path = os.path.expandvars(args.html_path) rc_name = args.rc_name + conv_commit_pattern = re.compile(r"^(\w+)(\([^\)]*\))?: (.+)$") + jira_ticket_regex = r" *\b[A-Z]{2,}\d?-\d+\b:?" # + empty_brackets_regex = r" *\[ *\]:?" # Sometimes Jira tickets are in square brackets + + change_infos = {} + + ci_patterns = ["/**/*.lock", "/**/*.bzl"] - with tempfile.TemporaryDirectory() as temp_ic_repo: + ic_repo_path = pathlib.Path.home() / ".cache/git/ic" + + if ic_repo_path.exists(): + print("Fetching new commits in {}".format(ic_repo_path)) + subprocess.check_call( + ["git", "fetch"], + cwd=ic_repo_path, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + else: + print("Cloning IC repo to {}".format(ic_repo_path)) subprocess.check_call( [ "git", "clone", - "--depth={}".format(max_commits), - "--filter=blob:none", - "--no-checkout", - "--single-branch", - "--branch={}".format(branch), "https://github.com/dfinity/ic.git", - temp_ic_repo, + ic_repo_path, ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) - commits = get_commits(temp_ic_repo, first_commit, last_commit) - for i in range(len(commits)): - commits[i] = commits[i] + (str(get_merge_commit(temp_ic_repo, commits[i][0], branch)),) + + codeowners = parse_codeowners(ic_repo_path / ".gitlab" / "CODEOWNERS") + + commits = get_commits(ic_repo_path, first_commit, last_commit) + for i in range(len(commits)): + commits[i] = commits[i] + (str(commits[i][0]),) if len(commits) == max_commits: print("WARNING: max commits limit reached, increase depth") exit(1) - # Current team membership can be found at https://www.notion.so/Teams-864f8176074b4bc7896147f4d1246b54 - teams = json.load(open(pathlib.Path(__file__).parent / "teams.json", encoding="utf8")) + replica_packages = ( + subprocess.check_output( + [ + "gitlab-ci/container/container-run.sh", + "bazel", + "query", + "--universe_scope=//...", + "deps(//ic-os/guestos/envs/prod:update-img.tar.gz) union deps(//ic-os/setupos/envs/prod:disk-img.tar.gz)", + "--output=package", + ], + cwd=ic_repo_path, + text=True, + ) + .strip() + .splitlines() + ) - release_notes = [] - excluded_changes = [] - errors = set() - replica_teams = [ - "Consensus", - "Crypto", - "Orchestrator", - "Message Routing", - "Networking", - "Execution", - "Node", - "Runtime", + replica_packages_filtered = [ + p for p in replica_packages if not any(re.match(f, p) for f in EXCLUDE_PACKAGES_FILTERS) ] - jira_ticket_regex = r" *\b[A-Z]{2,}\d?-\d+\b:?" # - empty_brackets_regex = r" *\[ *\]:?" # Sometimes Jira tickets are in square brackets + for i, _ in progressbar([i[0] for i in commits], "Processing commit: ", 80): + commit_info = commits[i] + commit_hash, commit_message, commiter, merge_commit = commit_info + + file_changes = file_changes_for_commit(commit_hash, ic_repo_path) + replica_change = any(any(c["file_path"][1:].startswith(p) for c in file_changes) for p in replica_packages) + if not replica_change: + continue - has_crossed_out_changes = False - for (_abbrv_commit_hash, _date, author, message, merge_commit) in commits: - authors_teams = [team for team, members in teams.items() if author in members] - if len(authors_teams) == 0: - errors.add("ERROR: author '{}' does not belong in any team".format(author)) + included = any(any(c["file_path"][1:].startswith(p) for c in file_changes) for p in replica_packages_filtered) - stripped_message = re.sub(jira_ticket_regex, "", message) + ownership = {} + stripped_message = re.sub(jira_ticket_regex, "", commit_message) stripped_message = re.sub(empty_brackets_regex, "", stripped_message) stripped_message = stripped_message.strip() - change = '* [{0}] {1}: {2}
'.format( - merge_commit[0:9], "/".join(authors_teams), stripped_message + conventional = parse_conventional_commit(stripped_message, conv_commit_pattern) + + for change in file_changes: + if any([fnmatch.fnmatch(change["file_path"], pattern) for pattern in ci_patterns]): + continue + + key = best_matching_regex(change["file_path"], codeowners.keys()) + teams = ["unknown"] if key is None else codeowners[key] + + for team in teams: + if team not in ownership: + ownership[team] = change["num_changes"] + continue + ownership[team] += change["num_changes"] + + # Non reviewed files + if "ghost" in ownership: + ownership.pop("ghost") + if "owners-owners" in ownership: + ownership.pop("owners-owners") + + teams = [] + if ownership: + max_ownership = ownership[max(ownership, key=lambda x: ownership[x])] + # Since multiple teams can own a path in CODEOWNERS we have to handle what happens if two teams have max changes + for key, value in ownership.items(): + if value >= max_ownership * MAX_OWNERSHIP_AREA: + teams.append(key) + + if "test" in conventional["message"]: + conventional["type"] = "test" + + commit_type = conventional["type"].lower() + commit_type = commit_type if commit_type in TYPE_PRETTY_MAP else "other" + if len(teams) >= 3: + # The change seems to be touching many teams, let's mark it as "other" (generic) + commit_type = "other" + + if ["ic-testing-verification"] == teams or all([team in EXCLUDED_TEAMS for team in teams]): + included = False + + if commit_type not in change_infos: + change_infos[commit_type] = [] + + commiter_parts = commiter.split() + commiter = "{:<4} {:<4}".format( + commiter_parts[0][:4], + commiter_parts[1][:4] if len(commiter_parts) >= 2 else "", ) - if any([authors_team in replica_teams for authors_team in authors_teams]): - if any([term in change.lower() for term in ["test", "refactor"]]): - release_notes.append("{}".format(change)) - has_crossed_out_changes = True - else: - release_notes.append(change) - else: - excluded_changes.append("{}".format(change)) - - if len(errors) > 0: - print("\n".join(errors)) - exit(1) - release_notes = sorted( - release_notes, key=lambda a: a[a.index("]") + 2 :] - ) # Sort without including abbrv_commit_hash - if has_crossed_out_changes: - release_notes.append("* Various tech-debt management: code refactoring, docs, bug fixes, test updates") + change_infos[commit_type].append( + { + "commit": merge_commit, + "team": teams, + "type": commit_type, + "scope": conventional["scope"] if conventional["scope"] else "", + "message": conventional["message"], + "commiter": commiter, + "included": included, + } + ) with open(html_path, "w", encoding="utf-8") as output: output.write( @@ -195,19 +423,36 @@ def main(): rc_name, last_commit ) ) - output.write( - "\n".join(["

{}

".format(n) for n in release_notes]) - ) - output.write("

Excluded changes:

\n") - output.write( - "\n".join( - [ - "

{}

".format(c) - for c in sorted(excluded_changes, key=lambda a: a[a.index("]") + 2 :]) - ] + for current_type in sorted(TYPE_PRETTY_MAP, key=lambda x: TYPE_PRETTY_MAP[x][1]): + if current_type not in change_infos: + continue + output.write( + '

## {0}:

\n'.format( + TYPE_PRETTY_MAP[current_type][0] + ) ) - ) + + for change in sorted(change_infos[current_type], key=lambda x: ",".join(x["team"])): + commit_part = '[{0}]'.format( + change["commit"][:9] + ) + team_part = ",".join([TEAM_PRETTY_MAP[team] for team in change["team"]]) + team_part = team_part if team_part else "General" + scope_part = ( + ":" + if change["scope"] == "" or change["scope"].lower() == team_part.lower() + else "({0}):".format(change["scope"]) + ) + message_part = change["message"] + commiter_part = f"<!-- {change['commiter']} -->" + + text = "* {0} {4} {1}{2} {3}
".format( + commit_part, team_part, scope_part, message_part, commiter_part + ) + if not change["included"]: + text = "{}".format(text) + output.write("

{}

".format(text)) output.write("")