From b992ab2f7e2932cfc4120937715ecc5e144d3e4d Mon Sep 17 00:00:00 2001 From: Pradeep Tammali Date: Thu, 28 Mar 2024 20:03:41 +0100 Subject: [PATCH] feature: use apis to fetch diff instead of subprocess --- codecov/coverage.py | 17 +++------ codecov/github.py | 15 ++++++++ codecov/github_client.py | 6 +++- codecov/main.py | 26 ++++---------- codecov/subprocess.py | 76 ---------------------------------------- 5 files changed, 30 insertions(+), 110 deletions(-) delete mode 100644 codecov/subprocess.py diff --git a/codecov/coverage.py b/codecov/coverage.py index 973002f..cc0ff52 100644 --- a/codecov/coverage.py +++ b/codecov/coverage.py @@ -8,7 +8,7 @@ import pathlib from collections.abc import Sequence -from codecov import log, subprocess +from codecov import log # The dataclasses in this module are accessible in the template, which is overridable by the user. @@ -238,17 +238,6 @@ def get_diff_coverage_info(added_lines: dict[pathlib.Path, list[int]], coverage: ) -def get_added_lines(git: subprocess.Git, base_ref: str) -> dict[pathlib.Path, list[int]]: - # --unified=0 means we don't get any context lines for chunk, and we - # don't merge chunks. This means the headers that describe line number - # are always enough to derive what line numbers were added. - # TODO: check if it is possible to pull the diff changes from github API - # TODO: Since we don't want to checkout the code locally, it would be better to pull the diff from remote - git.fetch('origin', base_ref, '--depth=1000') - diff = git.diff('--unified=0', 'FETCH_HEAD', '--', '.') - return parse_diff_output(diff) - - def parse_diff_output(diff: str) -> dict[pathlib.Path, list[int]]: current_file: pathlib.Path | None = None added_filename_prefix = '+++ b/' @@ -272,6 +261,8 @@ def parse_line_number_diff_line(line: str) -> Sequence[int]: Parse the "added" part of the line number diff text: @@ -60,0 +61 @@ def compute_files( -> [61] @@ -60,0 +61,3 @@ def compute_files( -> [61, 62, 63] + + Github API returns default context lines 3 at start and end, so we can ignore them. """ start, length = (int(i) for i in (line.split()[2][1:] + ',1').split(',')[:2]) - return range(start, start + length) + return range(start + 3, start + length - 3) diff --git a/codecov/github.py b/codecov/github.py index 840587b..51cd1f7 100644 --- a/codecov/github.py +++ b/codecov/github.py @@ -87,6 +87,21 @@ def get_pr_number(github: github_client.GitHub, config: settings.Config) -> int: ) +def get_pr_diff(github: github_client.GitHub, repository: str, pr_number: int) -> str: + try: + pull_request_diff = ( + github.repos(repository) + .pulls(pr_number) + .get(use_text=True, headers={'Accept': 'application/vnd.github.v3.diff'}) + ) + except github_client.Forbidden as exc: + raise CannotGetPullRequest from exc + except github_client.NotFound as exc: + raise CannotGetPullRequest from exc + + return pull_request_diff + + def post_comment( # pylint: disable=too-many-arguments github: github_client.GitHub, me: str, diff --git a/codecov/github_client.py b/codecov/github_client.py index 2a2403c..e52dea9 100644 --- a/codecov/github_client.py +++ b/codecov/github_client.py @@ -48,9 +48,10 @@ def __init__(self, session: httpx.Client): def __getattr__(self, attr): return _Callable(self, f'/{attr}') - def _http(self, method, path, *, use_bytes=False, **kw): + def _http(self, method, path, *, use_bytes=False, use_text=False, **kw): _method = method.lower() requests_kwargs = {} + headers = kw.pop('headers', {}) if _method == 'get' and kw: requests_kwargs = {'params': kw} @@ -61,10 +62,13 @@ def _http(self, method, path, *, use_bytes=False, **kw): _method.upper(), path, timeout=TIMEOUT, + headers=headers, **requests_kwargs, ) if use_bytes: contents = response.content + elif use_text: + contents = response.text else: contents = response_contents(response) diff --git a/codecov/main.py b/codecov/main.py index 5926bed..135b552 100644 --- a/codecov/main.py +++ b/codecov/main.py @@ -5,17 +5,7 @@ import httpx -from codecov import ( - coverage as coverage_module, - diff_grouper, - github, - github_client, - log, - log_utils, - settings, - subprocess, - template, -) +from codecov import coverage as coverage_module, diff_grouper, github, github_client, log, log_utils, settings, template def main(): @@ -33,9 +23,8 @@ def main(): follow_redirects=True, headers={'Authorization': f'token {config.GITHUB_TOKEN}'}, ) - git = subprocess.Git() - exit_code = action(config=config, github_session=github_session, git=git) + exit_code = action(config=config, github_session=github_session) log.info('Ending action') sys.exit(exit_code) @@ -46,7 +35,7 @@ def main(): sys.exit(1) -def action(config: settings.Config, github_session: httpx.Client, git: subprocess.Git) -> int: +def action(config: settings.Config, github_session: httpx.Client) -> int: log.debug('Fetching Pull Request') gh = github_client.GitHub(session=github_session) try: @@ -67,7 +56,6 @@ def action(config: settings.Config, github_session: httpx.Client, git: subproces config=config, gh=gh, repo_info=repo_info, - git=git, pr_number=pr_number, ) @@ -76,15 +64,13 @@ def process_pr( # pylint: disable=too-many-locals config: settings.Config, gh: github_client.GitHub, repo_info: github.RepositoryInfo, - git: subprocess.Git, pr_number: int, ) -> int: log.info('Generating comment for PR') - _, coverage = coverage_module.get_coverage_info( - coverage_path=config.COVERAGE_PATH, - ) + _, coverage = coverage_module.get_coverage_info(coverage_path=config.COVERAGE_PATH) base_ref = config.GITHUB_BASE_REF or repo_info.default_branch - added_lines = coverage_module.get_added_lines(git=git, base_ref=base_ref) + pr_diff = github.get_pr_diff(github=gh, repository=config.GITHUB_REPOSITORY, pr_number=pr_number) + added_lines = coverage_module.parse_diff_output(diff=pr_diff) diff_coverage = coverage_module.get_diff_coverage_info(coverage=coverage, added_lines=added_lines) marker = template.get_marker(marker_id=config.SUBPROJECT_ID) diff --git a/codecov/subprocess.py b/codecov/subprocess.py deleted file mode 100644 index 538b5e8..0000000 --- a/codecov/subprocess.py +++ /dev/null @@ -1,76 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import annotations - -import functools -import os -import pathlib -import shlex -import subprocess # nosec: B404:blacklist -from typing import Any - -from codecov import log - - -class SubProcessError(Exception): - pass - - -class GitError(SubProcessError): - pass - - -def run(*args, path: pathlib.Path, **kwargs) -> str: - try: - return subprocess.run( - [shlex.quote(arg) for arg in args], # nosec: B603:subprocess_without_shell_equals_true # noqa: S603 - cwd=path, - text=True, - # Only relates to DecodeErrors while decoding the output - errors='replace', - check=True, - capture_output=True, - **kwargs, - ).stdout - except subprocess.CalledProcessError as exc: - log.debug(f'Command failed: {args=} {path=} {kwargs=} {exc.stderr=} {exc.returncode=}') - raise SubProcessError('\n'.join([exc.stderr, exc.stdout])) from exc - - -class Git: - """ - Wrapper around calling git subprocesses in a way that reads a tiny bit like - Python code. - Call a method on git to call the corresponding subcommand (use `_` for `-`). - Add string parameters for the rest of the command line. - - Returns stdout or raise GitError - - >>> git = Git() - >>> git.clone(url) - >>> git.commit("-m", message) - >>> git.rev_parse("--short", "HEAD") - """ - - cwd = pathlib.Path('.') - - def _git(self, *args: str, env: dict[str, str] | None = None, **kwargs) -> str: - # When setting the `env` argument to run, instead of inheriting env - # vars from the current process, the whole environment of the - # subprocess is whatever we pass. In other words, we can either - # conditionally pass an `env` parameter, but it's less readable, - # or we can always pass an `env` parameter, but in this case, we - # need to always merge `os.environ` to it (and ensure our variables - # have precedence) - try: - return run( - 'git', - *args, - path=self.cwd, - env=os.environ | (env or {}), - **kwargs, - ) - except SubProcessError as exc: - raise GitError from exc - - def __getattr__(self, name: str) -> Any: - return functools.partial(self._git, name.replace('_', '-'))