From c75bbf65e2f9785c7aaed11ee2540805e1adc6f5 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 11:56:09 +0300 Subject: [PATCH 01/26] check large diffs with black, and skipp formatting in such case (after optimizing) --- code_to_optimize/few_formatting_errors.py | 47 +++++++ code_to_optimize/many_formatting_errors.py | 147 +++++++++++++++++++++ codeflash/code_utils/formatter.py | 39 +++++- tests/test_formatter.py | 69 ++++++++++ 4 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 code_to_optimize/few_formatting_errors.py create mode 100644 code_to_optimize/many_formatting_errors.py diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py new file mode 100644 index 000000000..905be2b39 --- /dev/null +++ b/code_to_optimize/few_formatting_errors.py @@ -0,0 +1,47 @@ +import os + +class BadlyFormattedClass(object): + def __init__( + self, + name, + age= None, + email= None, + phone=None, + address=None, + city=None, + state=None, + zip_code=None, + ): + self.name = name + self.age = age + self.email = email + self.phone = phone + self. address = address + self.city = city + self.state = state + self.zip_code = zip_code + self.data = {"name": name, "age": age, "email": email} + + def get_info(self): + return f"Name: {self.name}, Age: {self.age}" + + def update_data(self, **kwargs): + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + self.data.update(kwargs) + + +def process_data( + data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False +): + if not data_list: + return [] + if filter_func: + data_list = [ item for item in data_list if filter_func(item)] + if transform_func: + data_list = [transform_func(item) for item in data_list] + if sort_key: + data_list = sorted(data_list, key=sort_key, reverse=reverse) + return data_list + diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py new file mode 100644 index 000000000..bd792e3d3 --- /dev/null +++ b/code_to_optimize/many_formatting_errors.py @@ -0,0 +1,147 @@ +import os,sys,json,datetime,math,random;import requests;from collections import defaultdict,OrderedDict +from typing import List,Dict,Optional,Union,Tuple,Any;import numpy as np;import pandas as pd + +# This is a poorly formatted Python file with many style violations + +class BadlyFormattedClass( object ): + def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None): + self.name=name;self.age=age;self.email=email;self.phone=phone + self.address=address;self.city=city;self.state=state;self.zip_code=zip_code + self.data={"name":name,"age":age,"email":email} + + def get_info(self ): + return f"Name: {self.name}, Age: {self.age}" + + def update_data(self,**kwargs): + for key,value in kwargs.items(): + if hasattr(self,key):setattr(self,key,value) + self.data.update(kwargs) + +def process_data(data_list,filter_func=None,transform_func=None,sort_key=None,reverse=False): + if not data_list:return[] + if filter_func:data_list=[item for item in data_list if filter_func(item)] + if transform_func:data_list=[transform_func(item)for item in data_list] + if sort_key:data_list=sorted(data_list,key=sort_key,reverse=reverse) + return data_list + +def calculate_statistics(numbers): + if not numbers:return None + mean=sum(numbers)/len(numbers); median=sorted(numbers)[len(numbers)//2] + variance=sum((x-mean)**2 for x in numbers)/len(numbers);std_dev=math.sqrt(variance) + return {"mean":mean,"median":median,"variance":variance,"std_dev":std_dev,"min":min(numbers),"max":max(numbers)} + +def complex_nested_function(x,y,z): + def inner_function_1(a,b): + def deeply_nested(c,d): + return c*d+a*b + return deeply_nested(a+1,b-1)+deeply_nested(a-1,b+1) + def inner_function_2 (a,b,c): + result=[] + for i in range(a): + for j in range(b): + for k in range(c): + if i*j*k>0:result.append(i*j*k) + elif i+j+k==0:result.append(-1) + else :result.append(0) + return result + return inner_function_1(x,y)+sum(inner_function_2(x,y,z)) + +# Long lines and poor dictionary formatting +user_data={"users":[{"id":1,"name":"John Doe","email":"john@example.com","preferences":{"theme":"dark","notifications":True,"language":"en"},"metadata":{"created_at":"2023-01-01","last_login":"2024-01-01","login_count":150}},{"id":2,"name":"Jane Smith","email":"jane@example.com","preferences":{"theme":"light","notifications":False,"language":"es"},"metadata":{"created_at":"2023-02-15","last_login":"2024-01-15","login_count":89}}]} + +# Poor list formatting and string concatenation +long_list_of_items=['item_1','item_2','item_3','item_4','item_5','item_6','item_7','item_8','item_9','item_10','item_11','item_12','item_13','item_14','item_15','item_16','item_17','item_18','item_19','item_20'] + +def generate_report(data,include_stats=True,include_charts=False,format_type='json',output_file=None): + if not data:raise ValueError("Data cannot be empty") + report={'timestamp':datetime.datetime.now().isoformat(),'data_count':len(data),'summary':{}} + + # Bad formatting in loops and conditionals + for i,item in enumerate(data): + if isinstance(item,dict): + for key,value in item.items(): + if key not in report['summary']:report['summary'][key]=[] + report['summary'][key].append(value) + elif isinstance(item,(int,float)): + if 'numbers' not in report['summary']:report['summary']['numbers']=[] + report['summary']['numbers'].append(item) + else: + if 'other' not in report['summary']:report['summary']['other']=[] + report['summary']['other'].append(str(item)) + + if include_stats and 'numbers' in report['summary']: + numbers=report['summary']['numbers'] + report['statistics']=calculate_statistics(numbers) + + # Long conditional chain with poor formatting + if format_type=='json':result=json.dumps(report,indent=None,separators=(',',':')) + elif format_type=='pretty_json':result=json.dumps(report,indent=2) + elif format_type=='string':result=str(report) + else:result=report + + if output_file: + with open(output_file,'w')as f:f.write(result if isinstance(result,str)else json.dumps(result)) + + return result + +class DataProcessor ( BadlyFormattedClass ) : + def __init__(self,data_source,config=None,debug=False): + super().__init__("DataProcessor") + self.data_source=data_source;self.config=config or{};self.debug=debug + self.processed_data=[];self.errors=[];self.warnings=[] + + def load_data ( self ) : + try: + if isinstance(self.data_source,str): + if self.data_source.endswith('.json'): + with open(self.data_source,'r')as f:data=json.load(f) + elif self.data_source.endswith('.csv'):data=pd.read_csv(self.data_source).to_dict('records') + else:raise ValueError(f"Unsupported file type: {self.data_source}") + elif isinstance(self.data_source,list):data=self.data_source + else:data=[self.data_source] + return data + except Exception as e: + self.errors.append(str(e));return[] + + def validate_data(self,data): + valid_items=[];invalid_items=[] + for item in data: + if isinstance(item,dict)and'id'in item and'name'in item:valid_items.append(item) + else:invalid_items.append(item) + if invalid_items:self.warnings.append(f"Found {len(invalid_items)} invalid items") + return valid_items + + def process(self): + data=self.load_data() + if not data:return{"success":False,"error":"No data loaded"} + + validated_data=self.validate_data(data) + processed_result=process_data(validated_data, + filter_func=lambda x:x.get('active',True), + transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()}, + sort_key=lambda x:x.get('name','')) + + self.processed_data=processed_result + return{"success":True,"count":len(processed_result),"data":processed_result} +if __name__=="__main__": + sample_data=[{"id":1,"name":"Alice","active":True},{"id":2,"name":"Bob","active":False},{"id":3,"name":"Charlie","active":True}] + + processor=DataProcessor(sample_data,config={"debug":True}) + result=processor.process() + + if result["success"]: + print(f"Successfully processed {result['count']} items") + for item in result["data"][:3]:print(f"- {item['name']} (ID: {item['id']})") + else:print(f"Processing failed: {result.get('error','Unknown error')}") + + # Generate report with poor formatting + report=generate_report(sample_data,include_stats=True,format_type='pretty_json') + print("Generated report:",report[:100]+"..."if len(report)>100 else report) + + # Complex calculation with poor spacing + numbers=[random.randint(1,100)for _ in range(50)] + stats=calculate_statistics(numbers) + complex_result=complex_nested_function(5,3,2) + + print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}") + print(f"Complex calculation result: {complex_result}") \ No newline at end of file diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 927a4d4cb..0b673ae28 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -13,14 +13,49 @@ from pathlib import Path +def should_format_file(filepath, max_lines_changed=50): + try: + # check if black is installed + subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + result = subprocess.run( + ['black', '--diff', filepath], + capture_output=True, + text=True + ) + + if result.returncode == 0 and not result.stdout: + return False + + diff_lines = [line for line in result.stdout.split('\n') + if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))] + + changes_count = len(diff_lines) + + if changes_count > max_lines_changed: + logger.debug(f"Skipping {filepath}: {changes_count} lines would change (max: {max_lines_changed})") + return False + + return True + + except subprocess.CalledProcessError: + logger.warning(f"black command failed for {filepath}") + return False + except FileNotFoundError: + logger.warning("black is not installed. Skipping formatting check.") + return False + + + def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution formatter_name = formatter_cmds[0].lower() if not path.exists(): msg = f"File {path} does not exist. Cannot format the file." raise FileNotFoundError(msg) - if formatter_name == "disabled": + if formatter_name == "disabled" or not should_format_file(path): return path.read_text(encoding="utf8") + file_token = "$file" # noqa: S105 for command in formatter_cmds: formatter_cmd_list = shlex.split(command, posix=os.name != "nt") @@ -29,7 +64,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True result = subprocess.run(formatter_cmd_list, capture_output=True, check=False) if result.returncode == 0: if print_status: - console.rule(f"Formatted Successfully with: {formatter_name.replace('$file', path.name)}") + console.rule(f"Formatted Successfully with: {command.replace('$file', path.name)}") else: logger.error(f"Failed to format code with {' '.join(formatter_cmd_list)}") except FileNotFoundError as e: diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 5c0a91c38..14f6789e1 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -1,12 +1,17 @@ +import argparse import os import tempfile from pathlib import Path import pytest +import shutil from codeflash.code_utils.config_parser import parse_config_file from codeflash.code_utils.formatter import format_code, sort_imports +from codeflash.discovery.functions_to_optimize import FunctionToOptimize +from codeflash.optimization.function_optimizer import FunctionOptimizer +from codeflash.verification.verification_utils import TestConfig def test_remove_duplicate_imports(): """Test that duplicate imports are removed when should_sort_imports is True.""" @@ -209,3 +214,67 @@ def foo(): tmp_path = tmp.name with pytest.raises(FileNotFoundError): format_code(formatter_cmds=["exit 1"], path=Path(tmp_path)) + + +def _run_formatting_test(source_filename: str, should_content_change: bool): + """Helper function to run formatting tests with common setup and teardown.""" + with tempfile.TemporaryDirectory() as test_dir_str: + test_dir = Path(test_dir_str) + this_file = Path(__file__).resolve() + repo_root_dir = this_file.parent.parent + source_file = repo_root_dir / "code_to_optimize" / source_filename + + original = source_file.read_text() + target_path = test_dir / "target.py" + + shutil.copy2(source_file, target_path) + + function_to_optimize = FunctionToOptimize( + function_name="process_data", + parents=[], + file_path=target_path + ) + + test_cfg = TestConfig( + tests_root=test_dir, + project_root_path=test_dir, + test_framework="pytest", + tests_project_rootdir=test_dir, + ) + + args = argparse.Namespace( + disable_imports_sorting=False, + formatter_cmds=[ + "ruff check --exit-zero --fix $file", + "ruff format $file" + ], + ) + + optimizer = FunctionOptimizer( + function_to_optimize=function_to_optimize, + test_cfg=test_cfg, + args=args, + ) + + optimizer.reformat_code_and_helpers( + helper_functions=[], + path=target_path, + original_code=optimizer.function_to_optimize_source_code, + ) + + content = target_path.read_text() + + if should_content_change: + assert content != original, f"Expected content to change for {source_filename}" + else: + assert content == original, f"Expected content to remain unchanged for {source_filename}" + + +def test_formatting_file_with_many_diffs(): + """Test that files with many formatting errors are skipped (content unchanged).""" + _run_formatting_test("many_formatting_errors.py", should_content_change=False) + + +def test_formatting_file_with_few_diffs(): + """Test that files with few formatting errors are formatted (content changed).""" + _run_formatting_test("few_formatting_errors.py", should_content_change=True) \ No newline at end of file From 5cd13ad1caeb98fcc3b0c39f69b98d9abb84ed8f Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 11:58:55 +0300 Subject: [PATCH 02/26] new line --- tests/test_formatter.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 14f6789e1..3f45460eb 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -11,6 +11,7 @@ from codeflash.discovery.functions_to_optimize import FunctionToOptimize from codeflash.optimization.function_optimizer import FunctionOptimizer +from codeflash.optimization.function_optimizer import FunctionSource from codeflash.verification.verification_utils import TestConfig def test_remove_duplicate_imports(): @@ -257,7 +258,9 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): ) optimizer.reformat_code_and_helpers( - helper_functions=[], + helper_functions=[ + FunctionSource() + ], path=target_path, original_code=optimizer.function_to_optimize_source_code, ) @@ -277,4 +280,4 @@ def test_formatting_file_with_many_diffs(): def test_formatting_file_with_few_diffs(): """Test that files with few formatting errors are formatted (content changed).""" - _run_formatting_test("few_formatting_errors.py", should_content_change=True) \ No newline at end of file + _run_formatting_test("few_formatting_errors.py", should_content_change=True) From 152222726c19b5abb28d983180334f0708bc5476 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 12:02:53 +0300 Subject: [PATCH 03/26] better log messages --- codeflash/code_utils/formatter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 0b673ae28..3d6eff6cd 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -39,10 +39,10 @@ def should_format_file(filepath, max_lines_changed=50): return True except subprocess.CalledProcessError: - logger.warning(f"black command failed for {filepath}") + logger.warning(f"black --diff command failed for {filepath}") return False except FileNotFoundError: - logger.warning("black is not installed. Skipping formatting check.") + logger.warning("black formatter is not installed. Skipping formatting diff check.") return False From d3ca1cbf94e464d0cbecd0c234cb20885b7bf517 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 12:10:43 +0300 Subject: [PATCH 04/26] remove unnecessary check --- codeflash/code_utils/formatter.py | 3 --- tests/test_formatter.py | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 3d6eff6cd..f301bd013 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -23,9 +23,6 @@ def should_format_file(filepath, max_lines_changed=50): capture_output=True, text=True ) - - if result.returncode == 0 and not result.stdout: - return False diff_lines = [line for line in result.stdout.split('\n') if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))] diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 3f45460eb..7b0a43b42 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -11,7 +11,6 @@ from codeflash.discovery.functions_to_optimize import FunctionToOptimize from codeflash.optimization.function_optimizer import FunctionOptimizer -from codeflash.optimization.function_optimizer import FunctionSource from codeflash.verification.verification_utils import TestConfig def test_remove_duplicate_imports(): @@ -258,9 +257,7 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): ) optimizer.reformat_code_and_helpers( - helper_functions=[ - FunctionSource() - ], + helper_functions=[], path=target_path, original_code=optimizer.function_to_optimize_source_code, ) From dcb084ad12df7e01b82593e3a5f47a8b15a534e3 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 12:14:59 +0300 Subject: [PATCH 05/26] new line --- code_to_optimize/many_formatting_errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py index bd792e3d3..702539f70 100644 --- a/code_to_optimize/many_formatting_errors.py +++ b/code_to_optimize/many_formatting_errors.py @@ -144,4 +144,4 @@ def process(self): complex_result=complex_nested_function(5,3,2) print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}") - print(f"Complex calculation result: {complex_result}") \ No newline at end of file + print(f"Complex calculation result: {complex_result}") From 689a2d97af6e617407f1075da5e85ec9d67b8097 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 13:00:22 +0300 Subject: [PATCH 06/26] remove unused comment --- tests/test_formatter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 7b0a43b42..3106ee330 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -217,7 +217,6 @@ def foo(): def _run_formatting_test(source_filename: str, should_content_change: bool): - """Helper function to run formatting tests with common setup and teardown.""" with tempfile.TemporaryDirectory() as test_dir_str: test_dir = Path(test_dir_str) this_file = Path(__file__).resolve() From 44c0f85b6f7c1b4b047528426e6157fae852e681 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 13:55:10 +0300 Subject: [PATCH 07/26] the max lines for formatting changes to 100 --- codeflash/code_utils/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index f301bd013..13b330746 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -13,7 +13,7 @@ from pathlib import Path -def should_format_file(filepath, max_lines_changed=50): +def should_format_file(filepath, max_lines_changed=100): try: # check if black is installed subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) From 73ef51821ac8c3ec5daafe4234a5bf1d518f30f2 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 19:39:15 +0300 Subject: [PATCH 08/26] refactoring --- code_to_optimize/few_formatting_errors.py | 2 +- code_to_optimize/many_formatting_errors.py | 4 +- codeflash/code_utils/formatter.py | 89 ++++++++++++++-------- tests/test_formatter.py | 7 ++ 4 files changed, 66 insertions(+), 36 deletions(-) diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py index 905be2b39..27ed71b44 100644 --- a/code_to_optimize/few_formatting_errors.py +++ b/code_to_optimize/few_formatting_errors.py @@ -1,6 +1,6 @@ import os -class BadlyFormattedClass(object): +class UnformattedExampleClass(object): def __init__( self, name, diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py index 702539f70..79cfc825d 100644 --- a/code_to_optimize/many_formatting_errors.py +++ b/code_to_optimize/many_formatting_errors.py @@ -3,7 +3,7 @@ # This is a poorly formatted Python file with many style violations -class BadlyFormattedClass( object ): +class UnformattedExampleClass( object ): def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None): self.name=name;self.age=age;self.email=email;self.phone=phone self.address=address;self.city=city;self.state=state;self.zip_code=zip_code @@ -84,7 +84,7 @@ def generate_report(data,include_stats=True,include_charts=False,format_type='js return result -class DataProcessor ( BadlyFormattedClass ) : +class DataProcessor ( UnformattedExampleClass ) : def __init__(self,data_source,config=None,debug=False): super().__init__("DataProcessor") self.data_source=data_source;self.config=config or{};self.debug=debug diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 13b330746..94b5c7dc5 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -3,7 +3,7 @@ import os import shlex import subprocess -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import isort @@ -12,37 +12,60 @@ if TYPE_CHECKING: from pathlib import Path - -def should_format_file(filepath, max_lines_changed=100): - try: - # check if black is installed - subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - - result = subprocess.run( - ['black', '--diff', filepath], - capture_output=True, - text=True - ) - - diff_lines = [line for line in result.stdout.split('\n') - if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))] - - changes_count = len(diff_lines) - - if changes_count > max_lines_changed: - logger.debug(f"Skipping {filepath}: {changes_count} lines would change (max: {max_lines_changed})") - return False - - return True - - except subprocess.CalledProcessError: - logger.warning(f"black --diff command failed for {filepath}") - return False - except FileNotFoundError: - logger.warning("black formatter is not installed. Skipping formatting diff check.") - return False - - +def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: + try: + subprocess.run(['black', '--version'], check=True, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + result = subprocess.run( + ['black', '--diff', filepath], + capture_output=True, + text=True + ) + return result.stdout.strip() if result.stdout else None + except (FileNotFoundError): + return None + + +def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]: + try: + subprocess.run(['ruff', '--version'], check=True, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + result = subprocess.run( + ['ruff', "format", '--diff', filepath], + capture_output=True, + text=True + ) + return result.stdout.strip() if result.stdout else None + except (FileNotFoundError): + return None + + +def get_diff_lines_count(diff_output: str) -> int: + diff_lines = [line for line in diff_output.split('\n') + if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))] + return len(diff_lines) + +def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: + diff_changes_stdout = None + + diff_changes_stdout = get_diff_lines_output_by_black(filepath) + + if diff_changes_stdout is None: + logger.warning(f"black formatter not found, trying ruff instead...") + diff_changes_stdout = get_diff_lines_output_by_ruff(filepath) + if diff_changes_stdout is None: + msg = f"Both ruff, black formatters not found, skipping formatting diff check." + logger.warning(msg) + raise FileNotFoundError(msg) + + diff_lines_count = get_diff_lines_count(diff_changes_stdout) + + if diff_lines_count > max_diff_lines: + logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") + return False + else: + return True + def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution @@ -50,7 +73,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True if not path.exists(): msg = f"File {path} does not exist. Cannot format the file." raise FileNotFoundError(msg) - if formatter_name == "disabled" or not should_format_file(path): + if formatter_name == "disabled" or not is_safe_to_format(path): # few -> False, large -> True return path.read_text(encoding="utf8") file_token = "$file" # noqa: S105 diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 3106ee330..ed2d7233a 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -268,12 +268,19 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): else: assert content == original, f"Expected content to remain unchanged for {source_filename}" +def _ruff_or_black_installed() -> bool: + return shutil.which("black") is not None or shutil.which("ruff") is not None + def test_formatting_file_with_many_diffs(): """Test that files with many formatting errors are skipped (content unchanged).""" + if not _ruff_or_black_installed(): + pytest.skip("Neither black nor ruff is installed, skipping formatting tests.") _run_formatting_test("many_formatting_errors.py", should_content_change=False) def test_formatting_file_with_few_diffs(): """Test that files with few formatting errors are formatted (content changed).""" + if not _ruff_or_black_installed(): + pytest.skip("Neither black nor ruff is installed, skipping formatting tests.") _run_formatting_test("few_formatting_errors.py", should_content_change=True) From a5343fd9454eebf471fc893ad587a33a2e75b705 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 23:37:18 +0300 Subject: [PATCH 09/26] refactoring and improvements --- codeflash/code_utils/formatter.py | 61 ++++++++++++++++++------------- tests/test_formatter.py | 3 +- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 94b5c7dc5..3d5b587c6 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -4,7 +4,6 @@ import shlex import subprocess from typing import TYPE_CHECKING, Optional - import isort from codeflash.cli_cmds.console import console, logger @@ -12,37 +11,48 @@ if TYPE_CHECKING: from pathlib import Path -def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: +def get_nth_line(text: str, n: int) -> str | None: + for i, line in enumerate(text.splitlines(), start=1): + if i == n: + return line + return None + +def get_diff_output(cmd: list[str]) -> Optional[str]: try: - subprocess.run(['black', '--version'], check=True, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - result = subprocess.run( - ['black', '--diff', filepath], - capture_output=True, - text=True - ) - return result.stdout.strip() if result.stdout else None - except (FileNotFoundError): + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout.strip() or None + except (FileNotFoundError, subprocess.CalledProcessError) as e: + if isinstance(e, subprocess.CalledProcessError): + # ruff returns 1 when the file needs formatting, and 0 when it is already formatted + is_ruff = cmd[0] == "ruff" + if e.returncode == 0 and is_ruff: + return "" + elif e.returncode == 1 and is_ruff: + return e.stdout.strip() or None return None +def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: + try: + import black # type: ignore + return get_diff_output(['black', '--diff', filepath]) + except ImportError: + return None + def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]: try: - subprocess.run(['ruff', '--version'], check=True, - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - result = subprocess.run( - ['ruff', "format", '--diff', filepath], - capture_output=True, - text=True - ) - return result.stdout.strip() if result.stdout else None - except (FileNotFoundError): + import ruff # type: ignore + return get_diff_output(['ruff', 'format', '--diff', filepath]) + except ImportError: + print("can't import ruff") return None def get_diff_lines_count(diff_output: str) -> int: - diff_lines = [line for line in diff_output.split('\n') - if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))] + lines = diff_output.split('\n') + def is_diff_line(line: str) -> bool: + return line.startswith(('+', '-')) and not line.startswith(('+++', '---')) + diff_lines = [line for line in lines if is_diff_line(line)] return len(diff_lines) def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: @@ -54,9 +64,8 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: logger.warning(f"black formatter not found, trying ruff instead...") diff_changes_stdout = get_diff_lines_output_by_ruff(filepath) if diff_changes_stdout is None: - msg = f"Both ruff, black formatters not found, skipping formatting diff check." - logger.warning(msg) - raise FileNotFoundError(msg) + logger.warning(f"Both ruff, black formatters not found, skipping formatting diff check.") + return False diff_lines_count = get_diff_lines_count(diff_changes_stdout) @@ -73,7 +82,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True if not path.exists(): msg = f"File {path} does not exist. Cannot format the file." raise FileNotFoundError(msg) - if formatter_name == "disabled" or not is_safe_to_format(path): # few -> False, large -> True + if formatter_name == "disabled" or not is_safe_to_format(str(path)): return path.read_text(encoding="utf8") file_token = "$file" # noqa: S105 diff --git a/tests/test_formatter.py b/tests/test_formatter.py index ed2d7233a..c2e7864e6 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -217,6 +217,8 @@ def foo(): def _run_formatting_test(source_filename: str, should_content_change: bool): + if shutil.which("ruff") is None: + pytest.skip("ruff is not installed, skipping.") with tempfile.TemporaryDirectory() as test_dir_str: test_dir = Path(test_dir_str) this_file = Path(__file__).resolve() @@ -262,7 +264,6 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): ) content = target_path.read_text() - if should_content_change: assert content != original, f"Expected content to change for {source_filename}" else: From 395855d5c214c963d0c4784ccb3a42926074b6df Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 23:50:44 +0300 Subject: [PATCH 10/26] added black as dev dependency --- poetry.lock | 68 +++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + tests/test_formatter.py | 13 +++----- 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index 04cfeae09..b80c86387 100644 --- a/poetry.lock +++ b/poetry.lock @@ -73,6 +73,53 @@ files = [ {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"}, ] +[[package]] +name = "black" +version = "25.1.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, + {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, + {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, + {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, + {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, + {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, + {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, + {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, + {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, + {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, + {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, + {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, + {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, + {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, + {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, + {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, + {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, + {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, + {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, + {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, + {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, + {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.10)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "blessed" version = "1.21.0" @@ -248,7 +295,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -264,11 +311,11 @@ description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["main", "dev"] +markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "coverage" @@ -1025,8 +1072,11 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -1344,6 +1394,18 @@ files = [ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["docopt", "pytest"] +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -2686,4 +2748,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "1a73e9db33e3884cf1cc6e3371816aebd20831845ef9bf671be315e659480e86" +content-hash = "d0b959755aad4882df502f8ba219b865df472ba1830d5adf8e757aa6436bc3df" diff --git a/pyproject.toml b/pyproject.toml index c3e48f889..dd38137ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,6 +123,7 @@ types-pexpect = "^4.9.0.20241208" types-unidiff = "^0.7.0.20240505" uv = ">=0.6.2" pre-commit = "^4.2.0" +black = "^25.1.0" [tool.poetry.build] script = "codeflash/update_license_version.py" diff --git a/tests/test_formatter.py b/tests/test_formatter.py index c2e7864e6..b6c87b190 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -217,8 +217,10 @@ def foo(): def _run_formatting_test(source_filename: str, should_content_change: bool): - if shutil.which("ruff") is None: - pytest.skip("ruff is not installed, skipping.") + try: + import ruff # type: ignore + except ImportError: + pytest.skip("ruff is not installed") with tempfile.TemporaryDirectory() as test_dir_str: test_dir = Path(test_dir_str) this_file = Path(__file__).resolve() @@ -269,19 +271,12 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): else: assert content == original, f"Expected content to remain unchanged for {source_filename}" -def _ruff_or_black_installed() -> bool: - return shutil.which("black") is not None or shutil.which("ruff") is not None - def test_formatting_file_with_many_diffs(): """Test that files with many formatting errors are skipped (content unchanged).""" - if not _ruff_or_black_installed(): - pytest.skip("Neither black nor ruff is installed, skipping formatting tests.") _run_formatting_test("many_formatting_errors.py", should_content_change=False) def test_formatting_file_with_few_diffs(): """Test that files with few formatting errors are formatted (content changed).""" - if not _ruff_or_black_installed(): - pytest.skip("Neither black nor ruff is installed, skipping formatting tests.") _run_formatting_test("few_formatting_errors.py", should_content_change=True) From 822d6cc015d1a5dc3e6c28bea4a1ef599cb19a05 Mon Sep 17 00:00:00 2001 From: mohammed Date: Tue, 3 Jun 2025 23:57:55 +0300 Subject: [PATCH 11/26] made some refactor changes that codeflash suggested --- codeflash/code_utils/formatter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 3d5b587c6..e1d269aa7 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -27,7 +27,7 @@ def get_diff_output(cmd: list[str]) -> Optional[str]: is_ruff = cmd[0] == "ruff" if e.returncode == 0 and is_ruff: return "" - elif e.returncode == 1 and is_ruff: + if e.returncode == 1 and is_ruff: return e.stdout.strip() or None return None @@ -61,10 +61,10 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: diff_changes_stdout = get_diff_lines_output_by_black(filepath) if diff_changes_stdout is None: - logger.warning(f"black formatter not found, trying ruff instead...") + logger.warning("black formatter not found, trying ruff instead...") diff_changes_stdout = get_diff_lines_output_by_ruff(filepath) if diff_changes_stdout is None: - logger.warning(f"Both ruff, black formatters not found, skipping formatting diff check.") + logger.warning("Both ruff, black formatters not found, skipping formatting diff check.") return False diff_lines_count = get_diff_lines_count(diff_changes_stdout) @@ -72,8 +72,8 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: if diff_lines_count > max_diff_lines: logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") return False - else: - return True + + return True def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa From ce1502284a07e2adcf0c5a0ec080ff360ab81eab Mon Sep 17 00:00:00 2001 From: mohammed Date: Wed, 4 Jun 2025 00:42:40 +0300 Subject: [PATCH 12/26] remove unused function --- codeflash/code_utils/formatter.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index e1d269aa7..ec077f444 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -11,12 +11,6 @@ if TYPE_CHECKING: from pathlib import Path -def get_nth_line(text: str, n: int) -> str | None: - for i, line in enumerate(text.splitlines(), start=1): - if i == n: - return line - return None - def get_diff_output(cmd: list[str]) -> Optional[str]: try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) From d2a87116ec4702fefdd240f25d88a0073ef7ea0d Mon Sep 17 00:00:00 2001 From: mohammed Date: Wed, 4 Jun 2025 02:27:24 +0300 Subject: [PATCH 13/26] formatting & using internal black dep --- codeflash/code_utils/formatter.py | 65 +++++++++++-------------------- poetry.lock | 10 ++--- pyproject.toml | 2 +- tests/test_formatter.py | 3 +- 4 files changed, 30 insertions(+), 50 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index ec077f444..3144416e1 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -4,6 +4,7 @@ import shlex import subprocess from typing import TYPE_CHECKING, Optional + import isort from codeflash.cli_cmds.console import console, logger @@ -11,64 +12,43 @@ if TYPE_CHECKING: from pathlib import Path -def get_diff_output(cmd: list[str]) -> Optional[str]: - try: - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - return result.stdout.strip() or None - except (FileNotFoundError, subprocess.CalledProcessError) as e: - if isinstance(e, subprocess.CalledProcessError): - # ruff returns 1 when the file needs formatting, and 0 when it is already formatted - is_ruff = cmd[0] == "ruff" - if e.returncode == 0 and is_ruff: - return "" - if e.returncode == 1 and is_ruff: - return e.stdout.strip() or None - return None - -def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: +def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]: try: - import black # type: ignore - return get_diff_output(['black', '--diff', filepath]) - except ImportError: - return None + import black -def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]: - try: - import ruff # type: ignore - return get_diff_output(['ruff', 'format', '--diff', filepath]) + formatted_content = black.format_file_contents(src_contents=unformatted_content, fast=True, mode=black.Mode()) + return black.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath) except ImportError: - print("can't import ruff") return None def get_diff_lines_count(diff_output: str) -> int: - lines = diff_output.split('\n') + lines = diff_output.split("\n") + def is_diff_line(line: str) -> bool: - return line.startswith(('+', '-')) and not line.startswith(('+++', '---')) + return line.startswith(("+", "-")) and not line.startswith(("+++", "---")) + diff_lines = [line for line in lines if is_diff_line(line)] return len(diff_lines) -def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: - diff_changes_stdout = None - diff_changes_stdout = get_diff_lines_output_by_black(filepath) +def is_safe_to_format(filepath: str, content: str, max_diff_lines: int = 100) -> bool: + diff_changes_str = None + + diff_changes_str = get_diff_output_by_black(filepath, unformatted_content=content) - if diff_changes_stdout is None: - logger.warning("black formatter not found, trying ruff instead...") - diff_changes_stdout = get_diff_lines_output_by_ruff(filepath) - if diff_changes_stdout is None: - logger.warning("Both ruff, black formatters not found, skipping formatting diff check.") - return False - - diff_lines_count = get_diff_lines_count(diff_changes_stdout) - + if diff_changes_str is None: + logger.warning("Looks like black formatter not found, make sure it is installed.") + return False + + diff_lines_count = get_diff_lines_count(diff_changes_str) if diff_lines_count > max_diff_lines: - logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") + logger.debug(f"Skipping formatting {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") return False return True - + def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution @@ -76,8 +56,9 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True if not path.exists(): msg = f"File {path} does not exist. Cannot format the file." raise FileNotFoundError(msg) - if formatter_name == "disabled" or not is_safe_to_format(str(path)): - return path.read_text(encoding="utf8") + file_content = path.read_text(encoding="utf8") + if formatter_name == "disabled" or not is_safe_to_format(filepath=str(path), content=file_content): + return file_content file_token = "$file" # noqa: S105 for command in formatter_cmds: diff --git a/poetry.lock b/poetry.lock index b80c86387..ab3e6054b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -79,7 +79,7 @@ version = "25.1.0" description = "The uncompromising code formatter." optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main"] files = [ {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, @@ -295,7 +295,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -311,11 +311,11 @@ description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["main", "dev"] -markers = "sys_platform == \"win32\" or platform_system == \"Windows\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "coverage" @@ -1400,7 +1400,7 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -2748,4 +2748,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "d0b959755aad4882df502f8ba219b865df472ba1830d5adf8e757aa6436bc3df" +content-hash = "1ba28119bcc2b572133da8f243eea42fc8f732b6255afac7c2c7e616e2c68677" diff --git a/pyproject.toml b/pyproject.toml index dd38137ee..6a5c4904a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ crosshair-tool = ">=0.0.78" coverage = ">=7.6.4" line_profiler=">=4.2.0" #this is the minimum version which supports python 3.13 platformdirs = ">=4.3.7" +black = "^25.1.0" [tool.poetry.group.dev] optional = true @@ -123,7 +124,6 @@ types-pexpect = "^4.9.0.20241208" types-unidiff = "^0.7.0.20240505" uv = ">=0.6.2" pre-commit = "^4.2.0" -black = "^25.1.0" [tool.poetry.build] script = "codeflash/update_license_version.py" diff --git a/tests/test_formatter.py b/tests/test_formatter.py index b6c87b190..b500bbb4f 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -259,13 +259,12 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): args=args, ) - optimizer.reformat_code_and_helpers( + content, _ = optimizer.reformat_code_and_helpers( helper_functions=[], path=target_path, original_code=optimizer.function_to_optimize_source_code, ) - content = target_path.read_text() if should_content_change: assert content != original, f"Expected content to change for {source_filename}" else: From f46b3683b1391517cd13d3b666fdcf10fb382861 Mon Sep 17 00:00:00 2001 From: mohammed Date: Wed, 4 Jun 2025 03:01:51 +0300 Subject: [PATCH 14/26] fix black import issue --- codeflash/code_utils/formatter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 3144416e1..afbced761 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -15,10 +15,10 @@ def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]: try: - import black + from black import Mode, format_file_contents, output - formatted_content = black.format_file_contents(src_contents=unformatted_content, fast=True, mode=black.Mode()) - return black.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath) + formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode()) + return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath) except ImportError: return None From 6504cc4cc92725ca1dace7a57759dfe0c124fb0d Mon Sep 17 00:00:00 2001 From: mohammed Date: Wed, 4 Jun 2025 03:51:02 +0300 Subject: [PATCH 15/26] handle formatting files with no formatting issues --- code_to_optimize/no_formatting_errors.py | 71 ++++++++++++++++++++++++ codeflash/code_utils/formatter.py | 4 +- tests/test_formatter.py | 4 ++ 3 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 code_to_optimize/no_formatting_errors.py diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py new file mode 100644 index 000000000..3d32bc94c --- /dev/null +++ b/code_to_optimize/no_formatting_errors.py @@ -0,0 +1,71 @@ +import os, sys, json, datetime, math, random +import requests +from collections import defaultdict, OrderedDict +from typing import List, Dict, Optional, Union, Tuple, Any +import numpy as np +import pandas as pd + +# This is a poorly formatted Python file with many style violations + + +class UnformattedExampleClass(object): + def __init__( + self, + name, + age=None, + email=None, + phone=None, + address=None, + city=None, + state=None, + zip_code=None, + ): + self.name = name + self.age = age + self.email = email + self.phone = phone + self.address = address + self.city = city + self.state = state + self.zip_code = zip_code + self.data = {"name": name, "age": age, "email": email} + + def get_info(self): + return f"Name: {self.name}, Age: {self.age}" + + def update_data(self, **kwargs): + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + self.data.update(kwargs) + + +def process_data( + data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False +): + if not data_list: + return [] + if filter_func: + data_list = [item for item in data_list if filter_func(item)] + if transform_func: + data_list = [transform_func(item) for item in data_list] + if sort_key: + data_list = sorted(data_list, key=sort_key, reverse=reverse) + return data_list + + +def calculate_statistics(numbers): + if not numbers: + return None + mean = sum(numbers) / len(numbers) + median = sorted(numbers)[len(numbers) // 2] + variance = sum((x - mean) ** 2 for x in numbers) / len(numbers) + std_dev = math.sqrt(variance) + return { + "mean": mean, + "median": median, + "variance": variance, + "std_dev": std_dev, + "min": min(numbers), + "max": max(numbers), + } diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index afbced761..6188e8649 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -15,11 +15,11 @@ def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]: try: - from black import Mode, format_file_contents, output + from black import Mode, format_file_contents, output, report formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode()) return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath) - except ImportError: + except (ImportError, report.NothingChanged): return None diff --git a/tests/test_formatter.py b/tests/test_formatter.py index b500bbb4f..baf5b8079 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -279,3 +279,7 @@ def test_formatting_file_with_many_diffs(): def test_formatting_file_with_few_diffs(): """Test that files with few formatting errors are formatted (content changed).""" _run_formatting_test("few_formatting_errors.py", should_content_change=True) + +def test_formatting_file_with_no_diffs(): + """Test that files with no formatting errors are unchanged.""" + _run_formatting_test("no_formatting_errors.py", should_content_change=False) From 82a4ee17862297be769e4d85a7a7cf808456ad02 Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:05:25 +0300 Subject: [PATCH 16/26] use user pre-defined formatting commands, instead of using black --- code_to_optimize/no_formatting_errors.py | 27 +--- codeflash/code_utils/formatter.py | 126 ++++++++++++------- codeflash/optimization/function_optimizer.py | 11 +- poetry.lock | 61 +-------- pyproject.toml | 1 - tests/test_formatter.py | 10 ++ 6 files changed, 106 insertions(+), 130 deletions(-) diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py index 3d32bc94c..c521ef420 100644 --- a/code_to_optimize/no_formatting_errors.py +++ b/code_to_optimize/no_formatting_errors.py @@ -1,25 +1,8 @@ -import os, sys, json, datetime, math, random -import requests -from collections import defaultdict, OrderedDict -from typing import List, Dict, Optional, Union, Tuple, Any -import numpy as np -import pandas as pd +import math -# This is a poorly formatted Python file with many style violations - -class UnformattedExampleClass(object): - def __init__( - self, - name, - age=None, - email=None, - phone=None, - address=None, - city=None, - state=None, - zip_code=None, - ): +class UnformattedExampleClass: + def __init__(self, name, age=None, email=None, phone=None, address=None, city=None, state=None, zip_code=None): self.name = name self.age = age self.email = email @@ -40,9 +23,7 @@ def update_data(self, **kwargs): self.data.update(kwargs) -def process_data( - data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False -): +def process_data(data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False): if not data_list: return [] if filter_func: diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 6188e8649..0a51c303c 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -1,69 +1,73 @@ from __future__ import annotations +import difflib import os +import re import shlex +import shutil import subprocess -from typing import TYPE_CHECKING, Optional +import tempfile +from pathlib import Path +from typing import Optional import isort from codeflash.cli_cmds.console import console, logger -if TYPE_CHECKING: - from pathlib import Path +def generate_unified_diff(original: str, modified: str, from_file: str, to_file: str) -> str: + line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") -def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]: - try: - from black import Mode, format_file_contents, output, report + def split_lines(text: str) -> list[str]: + lines = [match[0] for match in line_pattern.finditer(text)] + if lines and lines[-1] == "": + lines.pop() + return lines - formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode()) - return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath) - except (ImportError, report.NothingChanged): - return None + original_lines = split_lines(original) + modified_lines = split_lines(modified) + diff_output = [] + for line in difflib.unified_diff(original_lines, modified_lines, fromfile=from_file, tofile=to_file, n=5): + if line.endswith("\n"): + diff_output.append(line) + else: + diff_output.append(line + "\n") + diff_output.append("\\ No newline at end of file\n") -def get_diff_lines_count(diff_output: str) -> int: - lines = diff_output.split("\n") - - def is_diff_line(line: str) -> bool: - return line.startswith(("+", "-")) and not line.startswith(("+++", "---")) - - diff_lines = [line for line in lines if is_diff_line(line)] - return len(diff_lines) + return "".join(diff_output) -def is_safe_to_format(filepath: str, content: str, max_diff_lines: int = 100) -> bool: - diff_changes_str = None - - diff_changes_str = get_diff_output_by_black(filepath, unformatted_content=content) - - if diff_changes_str is None: - logger.warning("Looks like black formatter not found, make sure it is installed.") - return False - - diff_lines_count = get_diff_lines_count(diff_changes_str) - if diff_lines_count > max_diff_lines: - logger.debug(f"Skipping formatting {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") - return False +def apply_formatter_cmds( + cmds: list[str], + path: Path, + test_dir_str: Optional[str], + print_status: bool, # noqa +) -> tuple[Path, str]: + # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution + formatter_name = cmds[0].lower() + should_make_copy = False + file_path = path - return True + if test_dir_str: + should_make_copy = True + file_path = Path(test_dir_str) / "temp.py" + if not cmds or formatter_name == "disabled": + return path, path.read_text(encoding="utf8") -def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa - # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution - formatter_name = formatter_cmds[0].lower() if not path.exists(): - msg = f"File {path} does not exist. Cannot format the file." + msg = f"File {path} does not exist. Cannot apply formatter commands." raise FileNotFoundError(msg) - file_content = path.read_text(encoding="utf8") - if formatter_name == "disabled" or not is_safe_to_format(filepath=str(path), content=file_content): - return file_content + + if should_make_copy: + shutil.copy2(path, file_path) file_token = "$file" # noqa: S105 - for command in formatter_cmds: + + for command in cmds: formatter_cmd_list = shlex.split(command, posix=os.name != "nt") - formatter_cmd_list = [path.as_posix() if chunk == file_token else chunk for chunk in formatter_cmd_list] + formatter_cmd_list = [file_path.as_posix() if chunk == file_token else chunk for chunk in formatter_cmd_list] try: result = subprocess.run(formatter_cmd_list, capture_output=True, check=False) if result.returncode == 0: @@ -83,7 +87,45 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True raise e from None - return path.read_text(encoding="utf8") + return file_path, file_path.read_text(encoding="utf8") + + +def get_diff_lines_count(diff_output: str) -> int: + lines = diff_output.split("\n") + + def is_diff_line(line: str) -> bool: + return line.startswith(("+", "-")) and not line.startswith(("+++", "---")) + + diff_lines = [line for line in lines if is_diff_line(line)] + return len(diff_lines) + + +def format_code(formatter_cmds: list[str], path: Path, optimized_function: str = "", print_status: bool = True) -> str: # noqa + with tempfile.TemporaryDirectory() as test_dir_str: + max_diff_lines = 100 + + original_code = path.read_text(encoding="utf8") + # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided) + original_code_without_opfunc = original_code.replace(optimized_function, "") + + original_temp = Path(test_dir_str) / "original_temp.py" + original_temp.write_text(original_code_without_opfunc, encoding="utf8") + + formatted_temp, formatted_code = apply_formatter_cmds( + formatter_cmds, original_temp, test_dir_str, print_status=False + ) + + diff_output = generate_unified_diff( + original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp) + ) + diff_lines_count = get_diff_lines_count(diff_output) + if diff_lines_count > max_diff_lines: + logger.debug(f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})") + return original_code + + _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status) + logger.debug(f"Formatted {path} with commands: {formatter_cmds}") + return formatted_code def sort_imports(code: str) -> str: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 12aeff3fa..1e918b40f 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -302,7 +302,10 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 ) new_code, new_helper_code = self.reformat_code_and_helpers( - code_context.helper_functions, explanation.file_path, self.function_to_optimize_source_code + code_context.helper_functions, + explanation.file_path, + self.function_to_optimize_source_code, + optimized_function=best_optimization.candidate.source_code, ) existing_tests = existing_tests_source_for( @@ -591,18 +594,18 @@ def write_code_and_helpers(original_code: str, original_helper_code: dict[Path, f.write(helper_code) def reformat_code_and_helpers( - self, helper_functions: list[FunctionSource], path: Path, original_code: str + self, helper_functions: list[FunctionSource], path: Path, original_code: str, optimized_function: str ) -> tuple[str, dict[Path, str]]: should_sort_imports = not self.args.disable_imports_sorting if should_sort_imports and isort.code(original_code) != original_code: should_sort_imports = False - new_code = format_code(self.args.formatter_cmds, path) + new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function) if should_sort_imports: new_code = sort_imports(new_code) new_helper_code: dict[Path, str] = {} - helper_functions_paths = {hf.file_path for hf in helper_functions} + helper_functions_paths = {hf.source_code for hf in helper_functions} for module_abspath in helper_functions_paths: formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath) if should_sort_imports: diff --git a/poetry.lock b/poetry.lock index ab3e6054b..825a15b02 100644 --- a/poetry.lock +++ b/poetry.lock @@ -73,53 +73,6 @@ files = [ {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"}, ] -[[package]] -name = "black" -version = "25.1.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, - {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, - {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, - {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, - {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, - {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, - {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, - {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, - {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, - {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, - {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, - {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, - {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, - {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, - {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, - {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, - {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, - {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, - {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, - {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, - {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, - {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.10)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - [[package]] name = "blessed" version = "1.21.0" @@ -1394,18 +1347,6 @@ files = [ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["docopt", "pytest"] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pexpect" version = "4.9.0" @@ -2748,4 +2689,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "1ba28119bcc2b572133da8f243eea42fc8f732b6255afac7c2c7e616e2c68677" +content-hash = "1a73e9db33e3884cf1cc6e3371816aebd20831845ef9bf671be315e659480e86" diff --git a/pyproject.toml b/pyproject.toml index 6a5c4904a..c3e48f889 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,6 @@ crosshair-tool = ">=0.0.78" coverage = ">=7.6.4" line_profiler=">=4.2.0" #this is the minimum version which supports python 3.13 platformdirs = ">=4.3.7" -black = "^25.1.0" [tool.poetry.group.dev] optional = true diff --git a/tests/test_formatter.py b/tests/test_formatter.py index baf5b8079..11790c951 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -263,6 +263,16 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): helper_functions=[], path=target_path, original_code=optimizer.function_to_optimize_source_code, + # this is just for testing, but in practice, this would be an optimized function code and it will be well-formatted + optimized_function=""" def process(self): + data=self.load_data() + if not data:return{"success":False,"error":"No data loaded"} + + validated_data=self.validate_data(data) + processed_result=process_data(validated_data, + filter_func=lambda x:x.get('active',True), + transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()}, + sort_key=lambda x:x.get('name',''))""", ) if should_content_change: From caeda49a74864e0e475e7a277549188036f087ee Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:25:26 +0300 Subject: [PATCH 17/26] make sure format_code recieves file path as path type not as str --- codeflash/code_utils/formatter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 0a51c303c..8ad3e3f02 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -104,6 +104,9 @@ def format_code(formatter_cmds: list[str], path: Path, optimized_function: str = with tempfile.TemporaryDirectory() as test_dir_str: max_diff_lines = 100 + if type(path) is str: + path = Path(path) + original_code = path.read_text(encoding="utf8") # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided) original_code_without_opfunc = original_code.replace(optimized_function, "") From 6967fcb22a9ce66f0668ea5bdb856db78899c5f0 Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:29:30 +0300 Subject: [PATCH 18/26] formatting and linting --- codeflash/code_utils/formatter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 8ad3e3f02..d7d4bd438 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -8,7 +8,7 @@ import subprocess import tempfile from pathlib import Path -from typing import Optional +from typing import Optional, Union import isort @@ -100,11 +100,16 @@ def is_diff_line(line: str) -> bool: return len(diff_lines) -def format_code(formatter_cmds: list[str], path: Path, optimized_function: str = "", print_status: bool = True) -> str: # noqa +def format_code( + formatter_cmds: list[str], + path: Union[str, Path], + optimized_function: str = "", + print_status: bool = True, # noqa +) -> str: with tempfile.TemporaryDirectory() as test_dir_str: max_diff_lines = 100 - if type(path) is str: + if isinstance(path, str): path = Path(path) original_code = path.read_text(encoding="utf8") From 8248c8e594d24c71749515630fc34704288ab4fe Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:48:20 +0300 Subject: [PATCH 19/26] typo --- codeflash/optimization/function_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 1e918b40f..419fa1e2e 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -605,7 +605,7 @@ def reformat_code_and_helpers( new_code = sort_imports(new_code) new_helper_code: dict[Path, str] = {} - helper_functions_paths = {hf.source_code for hf in helper_functions} + helper_functions_paths = {hf.file_path for hf in helper_functions} for module_abspath in helper_functions_paths: formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath) if should_sort_imports: From 15aacdbffa98eb17fb27bb9aa2336adfbe6a8501 Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:50:34 +0300 Subject: [PATCH 20/26] revert lock file changes --- poetry.lock | 3 --- 1 file changed, 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 825a15b02..04cfeae09 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1025,11 +1025,8 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, - {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, - {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, From c24fc9037f74f0c72dfe84a307523a4d84186249 Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 02:52:17 +0300 Subject: [PATCH 21/26] remove comment --- codeflash/code_utils/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index d7d4bd438..5d4540116 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -113,7 +113,7 @@ def format_code( path = Path(path) original_code = path.read_text(encoding="utf8") - # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided) + # we dont' count the formatting diff for the optimized function as it should be well-formatted original_code_without_opfunc = original_code.replace(optimized_function, "") original_temp = Path(test_dir_str) / "original_temp.py" From b48e9e6e64e24f7e58b641cd1cd8a18111ac4c67 Mon Sep 17 00:00:00 2001 From: mohammed Date: Thu, 5 Jun 2025 03:12:48 +0300 Subject: [PATCH 22/26] pass helper functions source code to the formatter for diff checking --- codeflash/optimization/function_optimizer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 419fa1e2e..7edba5e74 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -605,9 +605,12 @@ def reformat_code_and_helpers( new_code = sort_imports(new_code) new_helper_code: dict[Path, str] = {} - helper_functions_paths = {hf.file_path for hf in helper_functions} - for module_abspath in helper_functions_paths: - formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath) + for hp in helper_functions: + module_abspath = hp.file_path + hp_source_code = hp.source_code + formatted_helper_code = format_code( + self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code + ) if should_sort_imports: formatted_helper_code = sort_imports(formatted_helper_code) new_helper_code[module_abspath] = formatted_helper_code From 64f2dd99566eae09e0aa5035ed7bd861760f9653 Mon Sep 17 00:00:00 2001 From: mohammed Date: Sat, 7 Jun 2025 01:15:23 +0300 Subject: [PATCH 23/26] more unit tests --- code_to_optimize/few_formatting_errors.py | 47 -- code_to_optimize/many_formatting_errors.py | 147 ------ code_to_optimize/no_formatting_errors.py | 52 -- tests/test_formatter.py | 553 ++++++++++++++++++++- 4 files changed, 530 insertions(+), 269 deletions(-) delete mode 100644 code_to_optimize/few_formatting_errors.py delete mode 100644 code_to_optimize/many_formatting_errors.py delete mode 100644 code_to_optimize/no_formatting_errors.py diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py deleted file mode 100644 index 27ed71b44..000000000 --- a/code_to_optimize/few_formatting_errors.py +++ /dev/null @@ -1,47 +0,0 @@ -import os - -class UnformattedExampleClass(object): - def __init__( - self, - name, - age= None, - email= None, - phone=None, - address=None, - city=None, - state=None, - zip_code=None, - ): - self.name = name - self.age = age - self.email = email - self.phone = phone - self. address = address - self.city = city - self.state = state - self.zip_code = zip_code - self.data = {"name": name, "age": age, "email": email} - - def get_info(self): - return f"Name: {self.name}, Age: {self.age}" - - def update_data(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) - self.data.update(kwargs) - - -def process_data( - data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False -): - if not data_list: - return [] - if filter_func: - data_list = [ item for item in data_list if filter_func(item)] - if transform_func: - data_list = [transform_func(item) for item in data_list] - if sort_key: - data_list = sorted(data_list, key=sort_key, reverse=reverse) - return data_list - diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py deleted file mode 100644 index 79cfc825d..000000000 --- a/code_to_optimize/many_formatting_errors.py +++ /dev/null @@ -1,147 +0,0 @@ -import os,sys,json,datetime,math,random;import requests;from collections import defaultdict,OrderedDict -from typing import List,Dict,Optional,Union,Tuple,Any;import numpy as np;import pandas as pd - -# This is a poorly formatted Python file with many style violations - -class UnformattedExampleClass( object ): - def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None): - self.name=name;self.age=age;self.email=email;self.phone=phone - self.address=address;self.city=city;self.state=state;self.zip_code=zip_code - self.data={"name":name,"age":age,"email":email} - - def get_info(self ): - return f"Name: {self.name}, Age: {self.age}" - - def update_data(self,**kwargs): - for key,value in kwargs.items(): - if hasattr(self,key):setattr(self,key,value) - self.data.update(kwargs) - -def process_data(data_list,filter_func=None,transform_func=None,sort_key=None,reverse=False): - if not data_list:return[] - if filter_func:data_list=[item for item in data_list if filter_func(item)] - if transform_func:data_list=[transform_func(item)for item in data_list] - if sort_key:data_list=sorted(data_list,key=sort_key,reverse=reverse) - return data_list - -def calculate_statistics(numbers): - if not numbers:return None - mean=sum(numbers)/len(numbers); median=sorted(numbers)[len(numbers)//2] - variance=sum((x-mean)**2 for x in numbers)/len(numbers);std_dev=math.sqrt(variance) - return {"mean":mean,"median":median,"variance":variance,"std_dev":std_dev,"min":min(numbers),"max":max(numbers)} - -def complex_nested_function(x,y,z): - def inner_function_1(a,b): - def deeply_nested(c,d): - return c*d+a*b - return deeply_nested(a+1,b-1)+deeply_nested(a-1,b+1) - def inner_function_2 (a,b,c): - result=[] - for i in range(a): - for j in range(b): - for k in range(c): - if i*j*k>0:result.append(i*j*k) - elif i+j+k==0:result.append(-1) - else :result.append(0) - return result - return inner_function_1(x,y)+sum(inner_function_2(x,y,z)) - -# Long lines and poor dictionary formatting -user_data={"users":[{"id":1,"name":"John Doe","email":"john@example.com","preferences":{"theme":"dark","notifications":True,"language":"en"},"metadata":{"created_at":"2023-01-01","last_login":"2024-01-01","login_count":150}},{"id":2,"name":"Jane Smith","email":"jane@example.com","preferences":{"theme":"light","notifications":False,"language":"es"},"metadata":{"created_at":"2023-02-15","last_login":"2024-01-15","login_count":89}}]} - -# Poor list formatting and string concatenation -long_list_of_items=['item_1','item_2','item_3','item_4','item_5','item_6','item_7','item_8','item_9','item_10','item_11','item_12','item_13','item_14','item_15','item_16','item_17','item_18','item_19','item_20'] - -def generate_report(data,include_stats=True,include_charts=False,format_type='json',output_file=None): - if not data:raise ValueError("Data cannot be empty") - report={'timestamp':datetime.datetime.now().isoformat(),'data_count':len(data),'summary':{}} - - # Bad formatting in loops and conditionals - for i,item in enumerate(data): - if isinstance(item,dict): - for key,value in item.items(): - if key not in report['summary']:report['summary'][key]=[] - report['summary'][key].append(value) - elif isinstance(item,(int,float)): - if 'numbers' not in report['summary']:report['summary']['numbers']=[] - report['summary']['numbers'].append(item) - else: - if 'other' not in report['summary']:report['summary']['other']=[] - report['summary']['other'].append(str(item)) - - if include_stats and 'numbers' in report['summary']: - numbers=report['summary']['numbers'] - report['statistics']=calculate_statistics(numbers) - - # Long conditional chain with poor formatting - if format_type=='json':result=json.dumps(report,indent=None,separators=(',',':')) - elif format_type=='pretty_json':result=json.dumps(report,indent=2) - elif format_type=='string':result=str(report) - else:result=report - - if output_file: - with open(output_file,'w')as f:f.write(result if isinstance(result,str)else json.dumps(result)) - - return result - -class DataProcessor ( UnformattedExampleClass ) : - def __init__(self,data_source,config=None,debug=False): - super().__init__("DataProcessor") - self.data_source=data_source;self.config=config or{};self.debug=debug - self.processed_data=[];self.errors=[];self.warnings=[] - - def load_data ( self ) : - try: - if isinstance(self.data_source,str): - if self.data_source.endswith('.json'): - with open(self.data_source,'r')as f:data=json.load(f) - elif self.data_source.endswith('.csv'):data=pd.read_csv(self.data_source).to_dict('records') - else:raise ValueError(f"Unsupported file type: {self.data_source}") - elif isinstance(self.data_source,list):data=self.data_source - else:data=[self.data_source] - return data - except Exception as e: - self.errors.append(str(e));return[] - - def validate_data(self,data): - valid_items=[];invalid_items=[] - for item in data: - if isinstance(item,dict)and'id'in item and'name'in item:valid_items.append(item) - else:invalid_items.append(item) - if invalid_items:self.warnings.append(f"Found {len(invalid_items)} invalid items") - return valid_items - - def process(self): - data=self.load_data() - if not data:return{"success":False,"error":"No data loaded"} - - validated_data=self.validate_data(data) - processed_result=process_data(validated_data, - filter_func=lambda x:x.get('active',True), - transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()}, - sort_key=lambda x:x.get('name','')) - - self.processed_data=processed_result - return{"success":True,"count":len(processed_result),"data":processed_result} -if __name__=="__main__": - sample_data=[{"id":1,"name":"Alice","active":True},{"id":2,"name":"Bob","active":False},{"id":3,"name":"Charlie","active":True}] - - processor=DataProcessor(sample_data,config={"debug":True}) - result=processor.process() - - if result["success"]: - print(f"Successfully processed {result['count']} items") - for item in result["data"][:3]:print(f"- {item['name']} (ID: {item['id']})") - else:print(f"Processing failed: {result.get('error','Unknown error')}") - - # Generate report with poor formatting - report=generate_report(sample_data,include_stats=True,format_type='pretty_json') - print("Generated report:",report[:100]+"..."if len(report)>100 else report) - - # Complex calculation with poor spacing - numbers=[random.randint(1,100)for _ in range(50)] - stats=calculate_statistics(numbers) - complex_result=complex_nested_function(5,3,2) - - print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}") - print(f"Complex calculation result: {complex_result}") diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py deleted file mode 100644 index c521ef420..000000000 --- a/code_to_optimize/no_formatting_errors.py +++ /dev/null @@ -1,52 +0,0 @@ -import math - - -class UnformattedExampleClass: - def __init__(self, name, age=None, email=None, phone=None, address=None, city=None, state=None, zip_code=None): - self.name = name - self.age = age - self.email = email - self.phone = phone - self.address = address - self.city = city - self.state = state - self.zip_code = zip_code - self.data = {"name": name, "age": age, "email": email} - - def get_info(self): - return f"Name: {self.name}, Age: {self.age}" - - def update_data(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) - self.data.update(kwargs) - - -def process_data(data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False): - if not data_list: - return [] - if filter_func: - data_list = [item for item in data_list if filter_func(item)] - if transform_func: - data_list = [transform_func(item) for item in data_list] - if sort_key: - data_list = sorted(data_list, key=sort_key, reverse=reverse) - return data_list - - -def calculate_statistics(numbers): - if not numbers: - return None - mean = sum(numbers) / len(numbers) - median = sorted(numbers)[len(numbers) // 2] - variance = sum((x - mean) ** 2 for x in numbers) / len(numbers) - std_dev = math.sqrt(variance) - return { - "mean": mean, - "median": median, - "variance": variance, - "std_dev": std_dev, - "min": min(numbers), - "max": max(numbers), - } diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 11790c951..fbd7d0b9d 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -216,18 +216,18 @@ def foo(): format_code(formatter_cmds=["exit 1"], path=Path(tmp_path)) -def _run_formatting_test(source_filename: str, should_content_change: bool): +def _run_formatting_test(source_code: str, should_content_change: bool, expected = None, optimized_function: str = ""): try: import ruff # type: ignore except ImportError: pytest.skip("ruff is not installed") + with tempfile.TemporaryDirectory() as test_dir_str: test_dir = Path(test_dir_str) - this_file = Path(__file__).resolve() - repo_root_dir = this_file.parent.parent - source_file = repo_root_dir / "code_to_optimize" / source_filename - - original = source_file.read_text() + source_file = test_dir / "source.py" + + source_file.write_text(source_code) + original = source_code target_path = test_dir / "target.py" shutil.copy2(source_file, target_path) @@ -259,37 +259,544 @@ def _run_formatting_test(source_filename: str, should_content_change: bool): args=args, ) - content, _ = optimizer.reformat_code_and_helpers( + optimizer.reformat_code_and_helpers( helper_functions=[], path=target_path, original_code=optimizer.function_to_optimize_source_code, - # this is just for testing, but in practice, this would be an optimized function code and it will be well-formatted - optimized_function=""" def process(self): - data=self.load_data() - if not data:return{"success":False,"error":"No data loaded"} - - validated_data=self.validate_data(data) - processed_result=process_data(validated_data, - filter_func=lambda x:x.get('active',True), - transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()}, - sort_key=lambda x:x.get('name',''))""", + optimized_function=optimized_function, ) - + + content = target_path.read_text(encoding="utf8") + + if expected is not None: + assert content == expected, f"Expected content to be \n===========\n{expected}\n===========\nbut got\n===========\n{content}\n===========\n" + if should_content_change: - assert content != original, f"Expected content to change for {source_filename}" + assert content != original, f"Expected content to change for source.py" else: - assert content == original, f"Expected content to remain unchanged for {source_filename}" + assert content == original, f"Expected content to remain unchanged for source.py" + def test_formatting_file_with_many_diffs(): """Test that files with many formatting errors are skipped (content unchanged).""" - _run_formatting_test("many_formatting_errors.py", should_content_change=False) + source_code = '''import os,sys,json,datetime,re +from collections import defaultdict,OrderedDict +import numpy as np,pandas as pd + +class DataProcessor: + def __init__(self,config_path,data_path,output_path): + self.config_path=config_path + self.data_path=data_path + self.output_path=output_path + self.config={} + self.data=[] + self.results={} + + def load_config(self): + with open(self.config_path,'r') as f: + self.config=json.load(f) + if 'required_fields' not in self.config:self.config['required_fields']=[] + if 'optional_fields' not in self.config:self.config['optional_fields']=[] + return self.config + + def validate_data(self,data): + errors=[] + for idx,record in enumerate(data): + if not isinstance(record,dict): + errors.append(f"Record {idx} is not a dictionary") + continue + for field in self.config.get('required_fields',[]): + if field not in record: + errors.append(f"Record {idx} missing required field: {field}") + elif record[field] is None or record[field]=='': + errors.append(f"Record {idx} has empty required field: {field}") + return errors + + def process_data(self,data,filter_func=None,transform_func=None,sort_key=None): + if filter_func:data=[item for item in data if filter_func(item)] + if transform_func:data=[transform_func(item) for item in data] + if sort_key:data=sorted(data,key=sort_key) + aggregated_data=defaultdict(list) + for item in data: + category=item.get('category','unknown') + aggregated_data[category].append(item) + final_results={} + for category,items in aggregated_data.items(): + total_value=sum(item.get('value',0) for item in items) + avg_value=total_value/len(items) if items else 0 + final_results[category]={'count':len(items),'total':total_value,'average':avg_value,'items':items} + return final_results + + def save_results(self,results): + with open(self.output_path,'w') as f: + json.dump(results,f,indent=2,default=str) + print(f"Results saved to {self.output_path}") + + def run_pipeline(self): + try: + config=self.load_config() + with open(self.data_path,'r') as f: + raw_data=json.load(f) + validation_errors=self.validate_data(raw_data) + if validation_errors: + print("Validation errors found:") + for error in validation_errors:print(f" - {error}") + return False + processed_results=self.process_data(raw_data,filter_func=lambda x:x.get('active',True),transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},sort_key=lambda x:x.get('name','')) + self.save_results(processed_results) + return True + except Exception as e: + print(f"Pipeline failed: {str(e)}") + return False + +def main(): + processor=DataProcessor('/path/to/config.json','/path/to/data.json','/path/to/output.json') + success=processor.run_pipeline() + if success:print("Pipeline completed successfully") + else:print("Pipeline failed") + +if __name__=='__main__':main() +''' + _run_formatting_test(source_code, False) def test_formatting_file_with_few_diffs(): """Test that files with few formatting errors are formatted (content changed).""" - _run_formatting_test("few_formatting_errors.py", should_content_change=True) + source_code = '''import json +from datetime import datetime + +def process_data(data, config=None): + """Process data with optional configuration.""" + if not data: + return {"success": False, "error": "No data provided"} + + if config is None: + config = {"filter_active": True} + + # Minor formatting issues that should be fixed + result=[] + for item in data: + if config.get("filter_active") and not item.get("active",True): + continue + processed_item={ + "id": item.get("id"), + "name": item.get("name",""), + "value": item.get("value",0), + "processed_at": datetime.now().isoformat() + } + result.append(processed_item) + + return {"success": True, "data": result, "count": len(result)} +''' + _run_formatting_test(source_code, True) + def test_formatting_file_with_no_diffs(): """Test that files with no formatting errors are unchanged.""" - _run_formatting_test("no_formatting_errors.py", should_content_change=False) + # this test assumes you use ruff defaults for formatting + source_code = '''from datetime import datetime + + +def process_data(data, config=None): + """Process data with optional configuration.""" + if not data: + return {"success": False, "error": "No data provided"} + + if config is None: + config = {"filter_active": True} + + result = [] + for item in data: + if config.get("filter_active") and not item.get("active", True): + continue + + processed_item = { + "id": item.get("id"), + "name": item.get("name", ""), + "value": item.get("value", 0), + "processed_at": datetime.now().isoformat(), + } + result.append(processed_item) + + return {"success": True, "data": result, "count": len(result)} +''' + _run_formatting_test(source_code, False) + + +def test_formatting_extremely_messy_file(): + """Test that extremely messy files with 100+ potential changes are skipped.""" + source_code = '''import os,sys,json,datetime,re,collections,itertools,functools,operator +from pathlib import Path +from typing import Dict,List,Optional,Union,Any,Tuple +import numpy as np,pandas as pd,matplotlib.pyplot as plt +from dataclasses import dataclass,field + +@dataclass +class Config: + input_path:str + output_path:str + batch_size:int=100 + max_retries:int=3 + timeout:float=30.0 + debug:bool=False + filters:List[str]=field(default_factory=list) + transformations:Dict[str,Any]=field(default_factory=dict) + +class DataProcessorAdvanced: + def __init__(self,config:Config): + self.config=config + self.data=[] + self.results={} + self.errors=[] + self.stats={'processed':0,'failed':0,'skipped':0} + + def load_data(self,file_path:str)->List[Dict]: + try: + with open(file_path,'r',encoding='utf-8') as f: + if file_path.endswith('.json'):data=json.load(f) + elif file_path.endswith('.csv'): + import csv + reader=csv.DictReader(f) + data=[row for row in reader] + else:raise ValueError(f"Unsupported file format: {file_path}") + return data + except Exception as e:self.errors.append(f"Failed to load {file_path}: {str(e)}");return[] + + def validate_record(self,record:Dict,schema:Dict)->Tuple[bool,List[str]]: + errors=[] + for field,rules in schema.items(): + if rules.get('required',False) and field not in record: + errors.append(f"Missing required field: {field}") + elif field in record: + value=record[field] + if 'type' in rules and not isinstance(value,rules['type']): + errors.append(f"Field {field} has wrong type") + if 'min_length' in rules and isinstance(value,str) and len(value)rules['max_length']: + errors.append(f"Field {field} too long") + if 'min_value' in rules and isinstance(value,(int,float)) and valuerules['max_value']: + errors.append(f"Field {field} above maximum") + return len(errors)==0,errors + + def apply_filters(self,data:List[Dict])->List[Dict]: + filtered_data=data + for filter_name in self.config.filters: + if filter_name=='active_only':filtered_data=[r for r in filtered_data if r.get('active',True)] + elif filter_name=='has_value':filtered_data=[r for r in filtered_data if r.get('value') is not None] + elif filter_name=='recent_only': + cutoff=datetime.datetime.now()-datetime.timedelta(days=30) + filtered_data=[r for r in filtered_data if datetime.datetime.fromisoformat(r.get('created_at','1970-01-01'))>cutoff] + return filtered_data + + def apply_transformations(self,data:List[Dict])->List[Dict]: + for transform_name,params in self.config.transformations.items(): + if transform_name=='add_timestamp': + for record in data:record['processed_at']=datetime.datetime.now().isoformat() + elif transform_name=='normalize_names': + for record in data: + if 'name' in record:record['name']=record['name'].strip().title() + elif transform_name=='calculate_derived': + for record in data: + if 'value' in record and 'multiplier' in params: + record['derived_value']=record['value']*params['multiplier'] + return data + + def process_batch(self,batch:List[Dict])->Dict[str,Any]: + try: + processed_batch=[] + for record in batch: + try: + processed_record=dict(record) + processed_record['batch_id']=len(self.results) + processed_record['processed_at']=datetime.datetime.now().isoformat() + processed_batch.append(processed_record) + self.stats['processed']+=1 + except Exception as e: + self.errors.append(f"Failed to process record: {str(e)}") + self.stats['failed']+=1 + return {'success':True,'data':processed_batch,'count':len(processed_batch)} + except Exception as e: + self.errors.append(f"Batch processing failed: {str(e)}") + return {'success':False,'error':str(e)} + + def run_processing_pipeline(self)->bool: + try: + raw_data=self.load_data(self.config.input_path) + if not raw_data:return False + filtered_data=self.apply_filters(raw_data) + transformed_data=self.apply_transformations(filtered_data) + batches=[transformed_data[i:i+self.config.batch_size] for i in range(0,len(transformed_data),self.config.batch_size)] + all_results=[] + for i,batch in enumerate(batches): + if self.config.debug:print(f"Processing batch {i+1}/{len(batches)}") + result=self.process_batch(batch) + if result['success']:all_results.extend(result['data']) + else:self.stats['failed']+=len(batch) + with open(self.config.output_path,'w',encoding='utf-8') as f: + json.dump({'results':all_results,'stats':self.stats,'errors':self.errors},f,indent=2,default=str) + return True + except Exception as e: + self.errors.append(f"Pipeline failed: {str(e)}") + return False + +def create_sample_config()->Config: + return Config(input_path='input.json',output_path='output.json',batch_size=50,max_retries=3,timeout=60.0,debug=True,filters=['active_only','has_value'],transformations={'add_timestamp':{},'normalize_names':{},'calculate_derived':{'multiplier':1.5}}) + +def main(): + config=create_sample_config() + processor=DataProcessorAdvanced(config) + success=processor.run_processing_pipeline() + print(f"Processing {'completed' if success else 'failed'}") + print(f"Stats: {processor.stats}") + if processor.errors: + print("Errors encountered:") + for error in processor.errors:print(f" - {error}") + +if __name__=='__main__':main() +''' + _run_formatting_test(source_code, False) + + +def test_formatting_edge_case_exactly_100_diffs(): + """Test behavior when exactly at the threshold of 100 changes.""" + # Create a file with exactly 100 minor formatting issues + source_code = '''import json\n''' + ''' +def func{}(): + x=1;y=2;z=3 + return x+y+z +'''.replace('{}', '_{i}').format(i='{i}') * 33 # This creates exactly 100 potential formatting fixes + + _run_formatting_test(source_code, False) + + +def test_formatting_with_syntax_errors(): + """Test that files with syntax errors are handled gracefully.""" + source_code = '''import json + +def process_data(data): + if not data: + return {"error": "No data" + # Missing closing brace above + + result = [] + for item in data + # Missing colon above + result.append(item) + + return result +''' + _run_formatting_test(source_code, False) + + +def test_formatting_mixed_quotes_and_spacing(): + """Test files with mixed quote styles and inconsistent spacing.""" + source_code = '''import json +from datetime import datetime + +def process_mixed_style(data): + """Process data with mixed formatting styles.""" + config={'default_value':0,'required_fields':["id","name"],'optional_fields':["description","tags"]} + + results=[] + for item in data: + if not isinstance(item,dict):continue + + # Mixed quote styles + item_id=item.get("id") + item_name=item.get('name') + item_desc=item.get("description",'') + + # Inconsistent spacing + processed={ + 'id':item_id, + "name": item_name, + 'description':item_desc, + "processed_at":datetime.now().isoformat( ), + 'status':'processed' + } + results.append(processed) + + return {'data':results,"count":len(results)} +''' + _run_formatting_test(source_code, True) + + +def test_formatting_long_lines_and_imports(): + """Test files with long lines and import formatting issues.""" + source_code = '''import os, sys, json, datetime, re, collections, itertools +from pathlib import Path +from typing import Dict, List, Optional + +def process_with_long_lines(data, filter_func=lambda x: x.get('active', True) and x.get('value', 0) > 0, transform_func=lambda x: {**x, 'processed_at': datetime.datetime.now().isoformat(), 'status': 'processed'}): + """Function with very long parameter line.""" + return [transform_func(item) for item in data if filter_func(item) and isinstance(item, dict) and 'id' in item] + +def another_function_with_long_line(): + very_long_dictionary = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3', 'key4': 'value4', 'key5': 'value5'} + return very_long_dictionary +''' + _run_formatting_test(source_code, True) + + +def test_formatting_class_with_methods(): + """Test formatting of classes with multiple methods and minor issues.""" + source_code = '''class DataProcessor: + def __init__(self, config): + self.config=config + self.data=[] + + def load_data(self,file_path): + with open(file_path,'r') as f: + self.data=json.load(f) + return len(self.data) + + def process(self): + result=[] + for item in self.data: + if item.get('active',True): + result.append({ + 'id':item['id'], + 'processed':True + }) + return result +''' + _run_formatting_test(source_code, True) + + +def test_formatting_with_complex_comprehensions(): + """Test files with complex list/dict comprehensions and formatting.""" + source_code = '''def complex_comprehensions(data): + # Various comprehension styles with formatting issues + result1=[item['value'] for item in data if item.get('active',True) and 'value' in item] + + result2={item['id']:item['name'] for item in data if item.get('type')=='user'} + + result3=[[x,y] for x in range(10) for y in range(5) if x*y>10] + + # Nested comprehensions + nested=[[item for item in sublist if item%2==0] for sublist in data if isinstance(sublist,list)] + + return { + 'simple':result1, + 'mapping':result2, + 'complex':result3, + 'nested':nested + } +''' + _run_formatting_test(source_code, True) + + +def test_formatting_with_decorators_and_async(): + """Test files with decorators and async functions.""" + source_code = '''import asyncio +from functools import wraps + +def timer_decorator(func): + @wraps(func) + def wrapper(*args,**kwargs): + start=time.time() + result=func(*args,**kwargs) + end=time.time() + print(f"{func.__name__} took {end-start:.2f} seconds") + return result + return wrapper + +@timer_decorator +async def async_process_data(data): + result=[] + for item in data: + await asyncio.sleep(0.01) # Simulate async work + processed_item={'id':item.get('id'),'processed':True} + result.append(processed_item) + return result + +class AsyncProcessor: + @staticmethod + async def process_batch(batch): + return [{'id':item['id'],'status':'done'} for item in batch if 'id' in item] +''' + _run_formatting_test(source_code, True) + + +def test_formatting_threshold_configuration(): + """Test that the diff threshold can be configured (if supported).""" + # This test assumes the threshold might be configurable + source_code = '''import json,os,sys +def func1():x=1;y=2;return x+y +def func2():a=1;b=2;return a+b +def func3():c=1;d=2;return c+d +''' + # Test with a file that has moderate formatting issues + _run_formatting_test(source_code, True, optimized_function="def func2():a=1;b=2;return a+b") + + +def test_formatting_empty_file(): + """Test formatting of empty or minimal files.""" + source_code = '''# Just a comment pass +''' + _run_formatting_test(source_code, False) + + +def test_formatting_with_docstrings(): + """Test files with various docstring formats.""" + source_code = """def function_with_docstring( data): + ''' + This is a function with a docstring. + + Args: + data: Input data to process + + Returns: + Processed data + ''' + return [item for item in data if item.get('active',True)] + +class ProcessorWithDocs: + '''A processor class with documentation.''' + + def __init__(self,config): + '''Initialize with configuration.''' + self.config=config + + def process(self,data): + '''Single quote docstring with formatting issues.''' + return{'result':[item for item in data if self._is_valid(item)]} + + def _is_valid(self,item): + return isinstance(item,dict) and 'id' in item""" + expected = '''def function_with_docstring(data): + """This is a function with a docstring. + + Args: + data: Input data to process + + Returns: + Processed data + + """ + return [item for item in data if item.get("active", True)] + + +class ProcessorWithDocs: + """A processor class with documentation.""" + + def __init__(self, config): + """Initialize with configuration.""" + self.config = config + + def process(self, data): + """Single quote docstring with formatting issues.""" + return {"result": [item for item in data if self._is_valid(item)]} + + def _is_valid(self, item): + return isinstance(item, dict) and "id" in item +''' + + optimization_function = """ def process(self,data): + '''Single quote docstring with formatting issues.''' + return{'result':[item for item in data if self._is_valid(item)]}""" + _run_formatting_test(source_code, True, optimized_function=optimization_function, expected=expected) \ No newline at end of file From a1510a31da37b7e1d092af798eb65e1b0c9cd868 Mon Sep 17 00:00:00 2001 From: mohammed Date: Sat, 7 Jun 2025 02:09:15 +0300 Subject: [PATCH 24/26] enhancements --- codeflash/code_utils/formatter.py | 45 +++++++++++--------- codeflash/optimization/function_optimizer.py | 4 +- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index 5d4540116..d93ef46f7 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -104,32 +104,39 @@ def format_code( formatter_cmds: list[str], path: Union[str, Path], optimized_function: str = "", + check_diff: bool = False, # noqa print_status: bool = True, # noqa ) -> str: with tempfile.TemporaryDirectory() as test_dir_str: - max_diff_lines = 100 - if isinstance(path, str): path = Path(path) original_code = path.read_text(encoding="utf8") - # we dont' count the formatting diff for the optimized function as it should be well-formatted - original_code_without_opfunc = original_code.replace(optimized_function, "") - - original_temp = Path(test_dir_str) / "original_temp.py" - original_temp.write_text(original_code_without_opfunc, encoding="utf8") - - formatted_temp, formatted_code = apply_formatter_cmds( - formatter_cmds, original_temp, test_dir_str, print_status=False - ) - - diff_output = generate_unified_diff( - original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp) - ) - diff_lines_count = get_diff_lines_count(diff_output) - if diff_lines_count > max_diff_lines: - logger.debug(f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})") - return original_code + original_code_lines = len(original_code.split("\n")) + + if check_diff and original_code_lines > 50: + # we dont' count the formatting diff for the optimized function as it should be well-formatted + original_code_without_opfunc = original_code.replace(optimized_function, "") + + original_temp = Path(test_dir_str) / "original_temp.py" + original_temp.write_text(original_code_without_opfunc, encoding="utf8") + + formatted_temp, formatted_code = apply_formatter_cmds( + formatter_cmds, original_temp, test_dir_str, print_status=False + ) + + diff_output = generate_unified_diff( + original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp) + ) + diff_lines_count = get_diff_lines_count(diff_output) + + max_diff_lines = min(int(original_code_lines * 0.3), 50) + + if diff_lines_count > max_diff_lines and max_diff_lines != -1: + logger.debug( + f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})" + ) + return original_code _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status) logger.debug(f"Formatted {path} with commands: {formatter_cmds}") diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index f5e5bced0..205865f06 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -612,7 +612,7 @@ def reformat_code_and_helpers( if should_sort_imports and isort.code(original_code) != original_code: should_sort_imports = False - new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function) + new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function, check_diff=True) if should_sort_imports: new_code = sort_imports(new_code) @@ -621,7 +621,7 @@ def reformat_code_and_helpers( module_abspath = hp.file_path hp_source_code = hp.source_code formatted_helper_code = format_code( - self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code + self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code, check_diff=True ) if should_sort_imports: formatted_helper_code = sort_imports(formatted_helper_code) From 6cb846906fd22d3c3e8085044e710d39c4243025 Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Tue, 10 Jun 2025 15:40:11 +0530 Subject: [PATCH 25/26] Update formatter.py add a todo comment --- codeflash/code_utils/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index d93ef46f7..c4dd030b9 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -137,7 +137,7 @@ def format_code( f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})" ) return original_code - + # ToDO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above. _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status) logger.debug(f"Formatted {path} with commands: {formatter_cmds}") return formatted_code From 94e64d3218114391276f5b537dbcad161989e60c Mon Sep 17 00:00:00 2001 From: Sarthak Agarwal Date: Tue, 10 Jun 2025 15:42:27 +0530 Subject: [PATCH 26/26] Update formatter.py Fix ruff lint --- codeflash/code_utils/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index c4dd030b9..b1cb58540 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -137,7 +137,7 @@ def format_code( f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})" ) return original_code - # ToDO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above. + # TODO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above. _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status) logger.debug(f"Formatted {path} with commands: {formatter_cmds}") return formatted_code