From fe647069157562501010cff582f62604c93d7a39 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 20:48:30 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`g?= =?UTF-8?q?et=5Fdiff=5Flines=5Fcount`=20by=2015%=20in=20PR=20#274=20(`skip?= =?UTF-8?q?-formatting-for-large-diffs`)=20Here=20is=20an=20optimized=20ve?= =?UTF-8?q?rsion=20of=20your=20program.=20Key=20Improvements.=20-=20Avoids?= =?UTF-8?q?=20splitting=20all=20lines=20and=20list=20allocation;=20instead?= =?UTF-8?q?,=20iterates=20only=20as=20needed=20and=20sums=20matches=20(sav?= =?UTF-8?q?es=20both=20memory=20and=20runtime).=20-=20Eliminates=20the=20i?= =?UTF-8?q?nner=20function=20and=20replaces=20it=20with=20a=20fast=20inlin?= =?UTF-8?q?e=20check.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Why this is faster:** - Uses a simple for-loop instead of building a list. - Checks first character directly—less overhead than calling `startswith` multiple times. - Skips the closure. - No intermediate list storage. The function result and behavior are identical. --- codeflash/code_utils/formatter.py | 37 ++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 3d5b587c6..f76f6c0ad 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -4,6 +4,7 @@ import shlex import subprocess from typing import TYPE_CHECKING, Optional + import isort from codeflash.cli_cmds.console import console, logger @@ -11,12 +12,14 @@ if TYPE_CHECKING: from pathlib import Path + def get_nth_line(text: str, n: int) -> str | None: for i, line in enumerate(text.splitlines(), start=1): if i == n: return line return None + def get_diff_output(cmd: list[str]) -> Optional[str]: try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) @@ -27,7 +30,7 @@ def get_diff_output(cmd: list[str]) -> Optional[str]: is_ruff = cmd[0] == "ruff" if e.returncode == 0 and is_ruff: return "" - elif e.returncode == 1 and is_ruff: + if e.returncode == 1 and is_ruff: return e.stdout.strip() or None return None @@ -35,25 +38,30 @@ def get_diff_output(cmd: list[str]) -> Optional[str]: def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: try: import black # type: ignore - return get_diff_output(['black', '--diff', filepath]) + + return get_diff_output(["black", "--diff", filepath]) except ImportError: return None + def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]: try: import ruff # type: ignore - return get_diff_output(['ruff', 'format', '--diff', filepath]) + + return get_diff_output(["ruff", "format", "--diff", filepath]) except ImportError: print("can't import ruff") return None def get_diff_lines_count(diff_output: str) -> int: - lines = diff_output.split('\n') - def is_diff_line(line: str) -> bool: - return line.startswith(('+', '-')) and not line.startswith(('+++', '---')) - diff_lines = [line for line in lines if is_diff_line(line)] - return len(diff_lines) + # Count lines that are diff changes (start with '+' or '-', but not '+++' or '---') + count = 0 + for line in diff_output.split("\n"): + if line and (line[0] in ("+", "-")) and not (line[:3] == "+++" or line[:3] == "---"): + count += 1 + return count + def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: diff_changes_stdout = None @@ -61,20 +69,19 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: diff_changes_stdout = get_diff_lines_output_by_black(filepath) if diff_changes_stdout is None: - logger.warning(f"black formatter not found, trying ruff instead...") + logger.warning("black formatter not found, trying ruff instead...") diff_changes_stdout = get_diff_lines_output_by_ruff(filepath) if diff_changes_stdout is None: - logger.warning(f"Both ruff, black formatters not found, skipping formatting diff check.") + logger.warning("Both ruff, black formatters not found, skipping formatting diff check.") return False - + diff_lines_count = get_diff_lines_count(diff_changes_stdout) - + if diff_lines_count > max_diff_lines: logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") return False - else: - return True - + return True + def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution