diff --git a/.gitignore b/.gitignore index ccced1f..b449602 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,9 @@ tags # macOS-related .DS_Store +# Python-based virtual environment I use for publishing +.venv-publish + # Shamelessly taken from here because I'm lazy: # https://github.com/github/gitignore/blob/main/Python.gitignore diff --git a/MANIFEST.in b/MANIFEST.in index cce1886..30a3cc6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,9 @@ include LICENSE include README.md -include TODO.md +include CODE_OF_CONDUCT.md +include CONTRIBUTING.md recursive-include git_py_stats *.py -recursive-include man * -global-exclude *.pyc __pycache__/ +recursive-include man *.1 +global-exclude *.pyc __pycache__/ *.py[cod] prune git_py_stats/tests diff --git a/README.md b/README.md index 74f8506..db3dbd1 100644 --- a/README.md +++ b/README.md @@ -307,12 +307,22 @@ export _GIT_MERGE_VIEW="exclusive" ### Git Branch You can set the variable `_GIT_BRANCH` to set the branch of the stats. -Works with commands `--git-stats-by-branch` and `--csv-output-by-branch`. +Works with command `--csv-output-by-branch` only currently. ```bash export _GIT_BRANCH="master" ``` +### Ignore Authors + +You can set the variable `_GIT_IGNORE_AUTHORS` to filter out specific +authors. It will currently work with the "Code reviewers", "New contributors", +"All branches", and "Output daily stats by branch in CSV format" options. + +```bash +export _GIT_IGNORE_AUTHORS="(author@examle.com|username)" +``` + ### Sorting Contribution Stats You can sort contribution stats by field `name`, `commits`, `insertions`, diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 4f6bff5..0000000 --- a/TODO.md +++ /dev/null @@ -1,26 +0,0 @@ -# TODO - -* Create a full unit test suite - - Only some generic tests are currently done -* Create pipelines for when you commit - - Test suite should run when someone submits a PR in GitHub -* Add configuration similar to how `git-quick-stats` does it - - Maybe play around with a config file as an option that the user can save - - Probably choose INI since Python can handle this via configparser -* Structure could probably use a slight adjustment - - Right now, it's fairly small so we can deal with all of the source files - being co-located. As it grows, it might be better to create a logical - folder structure for everything. Be mindful of how imports will work -* Review imports - - I hate dealing with imports. There's always the battle between absolute - and implicit with many people have their own opinions on how it should be - handled. This will go hand-in-hand with the folder structure task -* Handle file generation better - - Right now, we just blast a file on the filesystem. Should we warn the user - if one exists already? Should we generate another and subtly rename it? -* Triple-check all functions perform exactly as they do in `git-quick-stats` - - Some of the file generation isn't quite 1:1. -* Run a pep8 linter on this - - flake8 should work. - `flake8 git_py_stats --max-line-length=120 --statistics --count` - or something diff --git a/git_py_stats/config.py b/git_py_stats/config.py index 8ec94af..313b45d 100644 --- a/git_py_stats/config.py +++ b/git_py_stats/config.py @@ -3,11 +3,34 @@ """ import os +import re from datetime import datetime -from typing import Dict, Union, Optional +from typing import Dict, Union, Optional, Callable from git_py_stats.git_operations import run_git_command +def _build_author_exclusion_filter(pattern: str) -> Callable[[str], bool]: + """ + Compile a string of authors that tells you whether an author + should be ignored based on a user-configured environment + variable. + + Args: + pattern (str): A regex (Example: "(user@example.com|Some User)"). + No flags are injected automatically, but users can + include them for case-insensitive matches. + + Returns: + Callable[[str], bool]: Input string 's' that matches the pattern to be + ignored. False otherwise. + """ + pattern = (pattern or "").strip() + if not pattern: + return lambda _s: False + rx = re.compile(pattern) + return lambda s: bool(rx.search(s or "")) + + def _parse_git_sort_by(raw: str) -> tuple[str, str]: """ Helper function for handling sorting features for contribution stats. @@ -81,11 +104,14 @@ def get_config() -> Dict[str, Union[str, int]]: - 'enable' to use the user's default merge view from the conf. Default is usually to show both regular and merge commits. - Any other value defaults to '--no-merges' currently. + _GIT_BRANCH (str): Sets branch you want to target for some stats. + Default is empty which falls back to the current branch you're on. _GIT_LIMIT (int): Limits the git log output. Defaults to 10. _GIT_LOG_OPTIONS (str): Additional git log options. Default is empty. _GIT_DAYS (int): Defines number of days for the heatmap. Default is empty. _GIT_SORT_BY (str): Defines sort metric and direction for contribution stats. Default is name-asc. + _GIT_IGNORE_AUTHORS (str): Defines authors to ignore. Default is empty. _MENU_THEME (str): Toggles between the default theme and legacy theme. - 'legacy' to set the legacy theme - 'none' to disable the menu theme @@ -100,8 +126,12 @@ def get_config() -> Dict[str, Union[str, int]]: - 'until' (str): Git command option for the end date. - 'pathspec' (str): Git command option for pathspec. - 'merges' (str): Git command option for merge commit view strategy. + - 'branch' (str): Git branch name. - 'limit' (int): Git log output limit. - 'log_options' (str): Additional git log options. + - 'days' (str): Number of days for the heatmap. + - 'sort_by' (str): Sort by field and sort direction (asc/desc). + - 'ignore_authors': (str): Any author(s) to ignore. - 'menu_theme' (str): Menu theme color. """ config: Dict[str, Union[str, int]] = {} @@ -146,6 +176,13 @@ def get_config() -> Dict[str, Union[str, int]]: else: config["merges"] = "--no-merges" + # _GIT_BRANCH + git_branch: Optional[str] = os.environ.get("_GIT_BRANCH") + if git_branch: + config["branch"] = git_branch + else: + config["branch"] = "" + # _GIT_LIMIT git_limit: Optional[str] = os.environ.get("_GIT_LIMIT") if git_limit: @@ -184,6 +221,10 @@ def get_config() -> Dict[str, Union[str, int]]: config["sort_by"] = sort_by config["sort_dir"] = sort_dir + # _GIT_IGNORE_AUTHORS + ignore_authors_pattern: Optional[str] = os.environ.get("_GIT_IGNORE_AUTHORS") + config["ignore_authors"] = _build_author_exclusion_filter(ignore_authors_pattern) + # _MENU_THEME menu_theme: Optional[str] = os.environ.get("_MENU_THEME") if menu_theme == "legacy": diff --git a/git_py_stats/generate_cmds.py b/git_py_stats/generate_cmds.py index 6813277..1c89187 100644 --- a/git_py_stats/generate_cmds.py +++ b/git_py_stats/generate_cmds.py @@ -451,8 +451,11 @@ def output_daily_stats_csv(config: Dict[str, Union[str, int]]) -> None: until = config.get("until", "") log_options = config.get("log_options", "") pathspec = config.get("pathspec", "") + branch = config.get("branch", "") + ignore_authors = config.get("ignore_authors", lambda _s: False) - branch = input("Enter branch name (leave empty for current branch): ") + if not branch: + branch = input("Enter branch name (leave empty for current branch): ") # Original command: # git -c log.showSignature=false log ${_branch} --use-mailmap $_merges --numstat \ @@ -478,22 +481,70 @@ def output_daily_stats_csv(config: Dict[str, Union[str, int]]) -> None: cmd = [arg for arg in cmd if arg] output = run_git_command(cmd) - if output: - dates = output.split("\n") - counter = collections.Counter(dates) - filename = "daily_stats.csv" - try: - with open(filename, "w", newline="") as csvfile: - fieldnames = ["Date", "Commits"] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - for date, count in sorted(counter.items()): - writer.writerow({"Date": date, "Commits": count}) - print(f"Daily stats saved to {filename}") - except IOError as e: - print(f"Failed to write to {filename}: {e}") - else: + + # Exit early if no output valid + if not output: print("No data available.") + return + + # NOTE: This has to be expanded to handle the new ability to ignore + # authors, but there might be a better way to handle this... + kept_lines = [] + current_block = [] + current_ignored = False + have_seen_author = False + + for line in output.splitlines(): + # New commit starts + if line.startswith("commit "): + # Flush the previous block + if current_block and not current_ignored: + kept_lines.extend(current_block) + # Reset for the next block + current_block = [line] + current_ignored = False + have_seen_author = False + continue + + # Only check author once per block + if not have_seen_author and line.startswith("Author: "): + author_line = line[len("Author: ") :].strip() + name = author_line + email = "" + if "<" in author_line and ">" in author_line: + name = author_line.split("<", 1)[0].strip() + email = author_line.split("<", 1)[1].split(">", 1)[0].strip() + + # If any form matches (name or email), drop the whole block + if ( + ignore_authors(author_line) + or ignore_authors(name) + or (email and ignore_authors(email)) + ): + current_ignored = True + have_seen_author = True + current_block.append(line) + + # Flush the last block + if current_block and not current_ignored: + kept_lines.extend(current_block) + + # Found nothing worth keeping? Just exit then + if not kept_lines: + print("No data available.") + return + + counter = collections.Counter(kept_lines) + filename = "git_daily_stats.csv" + try: + with open(filename, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=["Date", "Commits"]) + writer.writeheader() + for text, count in sorted(counter.items()): + writer.writerow({"Date": text, "Commits": count}) + print(f"Daily stats saved to {filename}") + except IOError as e: + print(f"Failed to write to {filename}: {e}") # TODO: This doesn't match the original functionality as it uses some pretty diff --git a/git_py_stats/interactive_mode.py b/git_py_stats/interactive_mode.py index ba008c2..0095510 100644 --- a/git_py_stats/interactive_mode.py +++ b/git_py_stats/interactive_mode.py @@ -30,7 +30,7 @@ def handle_interactive_mode(config: Dict[str, Union[str, int]]) -> None: "6": lambda: generate_cmds.output_daily_stats_csv(config), "7": lambda: generate_cmds.save_git_log_output_json(config), "8": lambda: list_cmds.branch_tree(config), - "9": list_cmds.branches_by_date, + "9": lambda: list_cmds.branches_by_date(config), "10": lambda: list_cmds.contributors(config), "11": lambda: list_cmds.new_contributors(config, input("Enter cutoff date (YYYY-MM-DD): ")), "12": lambda: list_cmds.git_commits_per_author(config), diff --git a/git_py_stats/list_cmds.py b/git_py_stats/list_cmds.py index ff1eaa5..e772b04 100644 --- a/git_py_stats/list_cmds.py +++ b/git_py_stats/list_cmds.py @@ -79,24 +79,32 @@ def branch_tree(config: Dict[str, Union[str, int]]) -> None: print("No data available.") -def branches_by_date() -> None: +def branches_by_date(config: Dict[str, Union[str, int]]) -> None: """ Lists branches sorted by the latest commit date. Args: - None + config: Dict[str, Union[str, int]]: Config dictionary holding env vars. Returns: None """ + # Grab the config options from our config.py. + ignore_authors = config.get("ignore_authors", lambda _s: False) + # Original command: # git for-each-ref --sort=committerdate refs/heads/ \ # --format='[%(authordate:relative)] %(authorname) %(refname:short)' | cat -n # TODO: Wouldn't git log --pretty=format:'%ad' --date=short be better here? # Then we could pipe it through sort, uniq -c, sort -nr, etc. # Possibly feed back into the parent project - format_str = "[%(authordate:relative)] %(authorname) %(refname:short)" + + # Include the email so we can filter based off it, but keep the visible + # part the same as before. + visible_fmt = "[%(authordate:relative)] %(authorname) %(refname:short)" + format_str = f"{visible_fmt}|%(authoremail)" + cmd = [ "git", "for-each-ref", @@ -106,19 +114,35 @@ def branches_by_date() -> None: ] output = run_git_command(cmd) - if output: - # Split the output into lines - lines = output.split("\n") + if not output: + print("No commits found.") + return - # Number the lines similar to 'cat -n' - numbered_lines = [f"{idx + 1} {line}" for idx, line in enumerate(lines)] + # Split lines and filter by author (both name and email), but keep + # visible text only. + visible_lines = [] + for raw in output.split("\n"): + if not raw.strip(): + continue + if "|" in raw: + visible, email = raw.split("|", 1) + else: + visible, email = raw, "" - # Output numbered lines - print("All branches (sorted by most recent commit):\n") - for line in numbered_lines: - print(f"\t{line}") - else: + # Filter by either email or the visible chunk. + if ignore_authors(email) or ignore_authors(visible): + continue + + visible_lines.append(visible) + + if not visible_lines: print("No commits found.") + return + + # Number like `cat -n` + print("All branches (sorted by most recent commit):\n") + for idx, line in enumerate(visible_lines, 1): + print(f"\t{idx} {line}") def contributors(config: Dict[str, Union[str, int]]) -> None: @@ -213,6 +237,7 @@ def new_contributors(config: Dict[str, Union[str, int]], new_date: str) -> None: until = config.get("until", "") log_options = config.get("log_options", "") pathspec = config.get("pathspec", "") + ignore_authors = config.get("ignore_authors", lambda _s: False) # Original command: # git -c log.showSignature=false log --use-mailmap $_merges \ @@ -245,6 +270,9 @@ def new_contributors(config: Dict[str, Union[str, int]], new_date: str) -> None: try: email, timestamp = line.split("|") timestamp = int(timestamp) + # Skip ignored by email + if ignore_authors(email): + continue # If the contributor is not in the dictionary or the current timestamp is earlier if email not in contributors_dict or timestamp < contributors_dict[email]: contributors_dict[email] = timestamp @@ -283,12 +311,14 @@ def new_contributors(config: Dict[str, Union[str, int]], new_date: str) -> None: name_cmd = [arg for arg in name_cmd if arg] # Grab name + email if we can. Otherwise, just grab email - name = run_git_command(name_cmd) - if name: - new_contributors_list.append((name, email)) - else: - new_contributors_list.append(("", email)) - + # while also making sure to ignore any authors that may be + # in our ignore_author env var + name = (run_git_command(name_cmd) or "").strip() + combo = f"{name} <{email}>" if name else f"<{email}>" + if ignore_authors(email) or ignore_authors(name) or ignore_authors(combo): + continue + + new_contributors_list.append((name, email)) # Sort the list alphabetically by name to match the original # and print all of this out if new_contributors_list: diff --git a/git_py_stats/non_interactive_mode.py b/git_py_stats/non_interactive_mode.py index cc271c5..d8166f9 100644 --- a/git_py_stats/non_interactive_mode.py +++ b/git_py_stats/non_interactive_mode.py @@ -30,7 +30,7 @@ def handle_non_interactive_mode(args: Namespace, config: Dict[str, Union[str, in "csv_output_by_branch": lambda: generate_cmds.output_daily_stats_csv(config), "json_output": lambda: generate_cmds.save_git_log_output_json(config), "branch_tree": lambda: list_cmds.branch_tree(config), - "branches_by_date": list_cmds.branches_by_date, + "branches_by_date": lambda: list_cmds.branches_by_date(config), "contributors": lambda: list_cmds.contributors(config), "new_contributors": lambda: list_cmds.new_contributors(config, args.new_contributors), "commits_per_author": lambda: list_cmds.git_commits_per_author(config), diff --git a/git_py_stats/suggest_cmds.py b/git_py_stats/suggest_cmds.py index bb54f57..9122b9d 100644 --- a/git_py_stats/suggest_cmds.py +++ b/git_py_stats/suggest_cmds.py @@ -34,6 +34,7 @@ def suggest_reviewers(config: Dict[str, Union[str, int]]) -> None: until = config.get("until", "") log_options = config.get("log_options", "") pathspec = config.get("pathspec", "") + ignore_authors = config.get("ignore_authors", lambda _s: False) cmd = [ "git", @@ -64,6 +65,9 @@ def suggest_reviewers(config: Dict[str, Union[str, int]]) -> None: lines = [line.strip() for line in output.splitlines()] lines = [line for line in lines if line] + # Drop ignored authors (name-or-email patterns both supported) + lines = [a for a in lines if not ignore_authors(a)] + # Return early if nothing found if not lines: print("No potential reviewers found.") diff --git a/git_py_stats/tests/test_generate_cmds.py b/git_py_stats/tests/test_generate_cmds.py index 9ae3a6d..d8a1843 100644 --- a/git_py_stats/tests/test_generate_cmds.py +++ b/git_py_stats/tests/test_generate_cmds.py @@ -268,11 +268,11 @@ def test_output_daily_stats_csv(self, mock_print, mock_input, mock_run_git_comma generate_cmds.output_daily_stats_csv(self.mock_config) # Check that file was written - mocked_file.assert_called_with("daily_stats.csv", "w", newline="") + mocked_file.assert_called_with("git_daily_stats.csv", "w", newline="") # Check that print was called self.assertTrue(mock_print.called) - mock_print.assert_any_call("Daily stats saved to daily_stats.csv") + mock_print.assert_any_call("Daily stats saved to git_daily_stats.csv") @patch("git_py_stats.generate_cmds.run_git_command") @patch("builtins.input", return_value="") @@ -378,7 +378,7 @@ def test_output_daily_stats_csv_io_error(self, mock_print, mock_input, mock_run_ with patch("builtins.open", side_effect=IOError("Disk full")): generate_cmds.output_daily_stats_csv(self.mock_config) - mock_print.assert_any_call("Failed to write to daily_stats.csv: Disk full") + mock_print.assert_any_call("Failed to write to git_daily_stats.csv: Disk full") @patch("git_py_stats.generate_cmds.run_git_command") @patch("builtins.print") diff --git a/git_py_stats/tests/test_interactive_mode.py b/git_py_stats/tests/test_interactive_mode.py index d606030..6f3edba 100644 --- a/git_py_stats/tests/test_interactive_mode.py +++ b/git_py_stats/tests/test_interactive_mode.py @@ -89,7 +89,7 @@ def test_option_8(self, mock_branch_tree, mock_interactive_menu): def test_option_9(self, mock_branches_by_date, mock_interactive_menu): mock_interactive_menu.side_effect = ["9", ""] interactive_mode.handle_interactive_mode(self.mock_config) - mock_branches_by_date.assert_called_once_with() + mock_branches_by_date.assert_called_once_with(self.mock_config) @patch("git_py_stats.interactive_mode.interactive_menu") @patch("git_py_stats.list_cmds.contributors") diff --git a/git_py_stats/tests/test_list_cmds.py b/git_py_stats/tests/test_list_cmds.py index 4e354c4..f45a0b9 100644 --- a/git_py_stats/tests/test_list_cmds.py +++ b/git_py_stats/tests/test_list_cmds.py @@ -57,7 +57,7 @@ def test_branches_by_date(self, mock_print, mock_run_git_command) -> None: mock_run_git_command.return_value = ( "[2021-01-01] Author1 branch1\n" "[2021-01-02] Author2 branch2\n" ) - list_cmds.branches_by_date() + list_cmds.branches_by_date(self.mock_config) mock_print.assert_called() mock_run_git_command.assert_called_once() @@ -69,7 +69,7 @@ def test_branches_by_date_no_data(self, mock_print, mock_run_git_command) -> Non Test case for branches_by_date with no data. """ mock_run_git_command.return_value = "" - list_cmds.branches_by_date() + list_cmds.branches_by_date(self.mock_config) mock_print.assert_called_with("No commits found.") diff --git a/git_py_stats/tests/test_non_interactive_mode.py b/git_py_stats/tests/test_non_interactive_mode.py index 6f1cdc8..dbf2de9 100644 --- a/git_py_stats/tests/test_non_interactive_mode.py +++ b/git_py_stats/tests/test_non_interactive_mode.py @@ -136,7 +136,7 @@ def test_branches_by_date(self, mock_branches_by_date): args_dict["branches_by_date"] = True args = Namespace(**args_dict) non_interactive_mode.handle_non_interactive_mode(args, self.mock_config) - mock_branches_by_date.assert_called_once_with() + mock_branches_by_date.assert_called_once_with(self.mock_config) @patch("git_py_stats.non_interactive_mode.list_cmds.contributors") def test_contributors(self, mock_contributors): diff --git a/man/git-py-stats.1 b/man/git-py-stats.1 index 41e4ad9..1cc7d22 100644 --- a/man/git-py-stats.1 +++ b/man/git-py-stats.1 @@ -1,4 +1,4 @@ -.TH GIT-PY-STATS "1" "September 2024" "git-py-stats 0.1.0" "User Commands" +.TH GIT-PY-STATS "1" "September 2024" "git-py-stats 0.2.0" "User Commands" .SH NAME git-py-stats \- A Python implementation of git-quick-stats. diff --git a/setup.py b/setup.py index 215377b..4dacde2 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,8 @@ setup( name="git-py-stats", - version="0.1.0", - packages=find_packages(), + version="0.2.0", + packages=find_packages(exclude=("git_py_stats.tests", "git_py_stats.tests.*")), entry_points={ "console_scripts": [ "git-py-stats=git_py_stats.main:main", @@ -22,10 +22,9 @@ author="Tom Ice", author_email="contact@thomasice.com", license="MIT", - url="https://github.com/tomice/git-py-stats", + url="https://github.com/git-quick-stats/git-py-stats", classifiers=[ "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], python_requires=">=3.8",