From c75bbf65e2f9785c7aaed11ee2540805e1adc6f5 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 11:56:09 +0300
Subject: [PATCH 01/26] check large diffs with black, and skipp formatting in
 such case (after optimizing)

---
 code_to_optimize/few_formatting_errors.py  |  47 +++++++
 code_to_optimize/many_formatting_errors.py | 147 +++++++++++++++++++++
 codeflash/code_utils/formatter.py          |  39 +++++-
 tests/test_formatter.py                    |  69 ++++++++++
 4 files changed, 300 insertions(+), 2 deletions(-)
 create mode 100644 code_to_optimize/few_formatting_errors.py
 create mode 100644 code_to_optimize/many_formatting_errors.py

diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py
new file mode 100644
index 000000000..905be2b39
--- /dev/null
+++ b/code_to_optimize/few_formatting_errors.py
@@ -0,0 +1,47 @@
+import os
+
+class BadlyFormattedClass(object):
+    def __init__(
+        self,
+        name,
+        age=    None,
+        email=  None,
+        phone=None,
+        address=None,
+        city=None,
+        state=None,
+        zip_code=None,
+    ):
+        self.name = name
+        self.age = age
+        self.email = email
+        self.phone = phone
+        self.   address = address
+        self.city = city
+        self.state = state
+        self.zip_code = zip_code
+        self.data = {"name": name, "age": age, "email": email}
+
+    def get_info(self):
+        return f"Name: {self.name}, Age: {self.age}"
+
+    def update_data(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        self.data.update(kwargs)
+
+
+def process_data(
+    data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False
+):
+    if not data_list:
+        return []
+    if filter_func:
+        data_list = [   item for item in data_list if filter_func(item)]
+    if transform_func:
+        data_list = [transform_func(item) for item in data_list]
+    if sort_key:
+        data_list = sorted(data_list, key=sort_key, reverse=reverse)
+    return data_list
+
diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py
new file mode 100644
index 000000000..bd792e3d3
--- /dev/null
+++ b/code_to_optimize/many_formatting_errors.py
@@ -0,0 +1,147 @@
+import os,sys,json,datetime,math,random;import requests;from collections import defaultdict,OrderedDict
+from typing import List,Dict,Optional,Union,Tuple,Any;import numpy as np;import pandas as pd
+
+# This is a poorly formatted Python file with many style violations
+
+class   BadlyFormattedClass( object ):
+    def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None):
+        self.name=name;self.age=age;self.email=email;self.phone=phone
+        self.address=address;self.city=city;self.state=state;self.zip_code=zip_code
+        self.data={"name":name,"age":age,"email":email}
+
+    def   get_info(self   ):
+        return f"Name: {self.name}, Age: {self.age}"
+
+    def update_data(self,**kwargs):
+        for key,value in kwargs.items():
+            if hasattr(self,key):setattr(self,key,value)
+        self.data.update(kwargs)
+
+def process_data(data_list,filter_func=None,transform_func=None,sort_key=None,reverse=False):
+    if not data_list:return[]
+    if filter_func:data_list=[item for item in data_list if filter_func(item)]
+    if transform_func:data_list=[transform_func(item)for item in data_list]
+    if sort_key:data_list=sorted(data_list,key=sort_key,reverse=reverse)
+    return data_list
+
+def calculate_statistics(numbers):
+    if not numbers:return None
+    mean=sum(numbers)/len(numbers);         median=sorted(numbers)[len(numbers)//2]
+    variance=sum((x-mean)**2 for x in numbers)/len(numbers);std_dev=math.sqrt(variance)
+    return      {"mean":mean,"median":median,"variance":variance,"std_dev":std_dev,"min":min(numbers),"max":max(numbers)}
+
+def complex_nested_function(x,y,z):
+    def inner_function_1(a,b):
+        def deeply_nested(c,d):
+            return c*d+a*b
+        return deeply_nested(a+1,b-1)+deeply_nested(a-1,b+1)
+    def     inner_function_2    (a,b,c):
+        result=[]
+        for i in range(a):
+            for j in     range(b):
+                for k in range(c):
+                    if i*j*k>0:result.append(i*j*k)
+                    elif i+j+k==0:result.append(-1)
+                    else    :result.append(0)
+        return result
+    return inner_function_1(x,y)+sum(inner_function_2(x,y,z))
+
+# Long lines and poor dictionary formatting
+user_data={"users":[{"id":1,"name":"John Doe","email":"john@example.com","preferences":{"theme":"dark","notifications":True,"language":"en"},"metadata":{"created_at":"2023-01-01","last_login":"2024-01-01","login_count":150}},{"id":2,"name":"Jane Smith","email":"jane@example.com","preferences":{"theme":"light","notifications":False,"language":"es"},"metadata":{"created_at":"2023-02-15","last_login":"2024-01-15","login_count":89}}]}
+
+# Poor list formatting and string concatenation
+long_list_of_items=['item_1','item_2','item_3','item_4','item_5','item_6','item_7','item_8','item_9','item_10','item_11','item_12','item_13','item_14','item_15','item_16','item_17','item_18','item_19','item_20']
+
+def generate_report(data,include_stats=True,include_charts=False,format_type='json',output_file=None):
+    if not data:raise ValueError("Data cannot be empty")
+    report={'timestamp':datetime.datetime.now().isoformat(),'data_count':len(data),'summary':{}}
+    
+    # Bad formatting in loops and conditionals
+    for i,item in enumerate(data):
+        if isinstance(item,dict):
+            for key,value in item.items():
+                if key not in report['summary']:report['summary'][key]=[]
+                report['summary'][key].append(value)
+        elif isinstance(item,(int,float)):
+            if 'numbers' not in report['summary']:report['summary']['numbers']=[]
+            report['summary']['numbers'].append(item)
+        else:
+            if 'other' not in report['summary']:report['summary']['other']=[]
+            report['summary']['other'].append(str(item))
+    
+    if include_stats and 'numbers' in report['summary']:
+        numbers=report['summary']['numbers']
+        report['statistics']=calculate_statistics(numbers)
+    
+    # Long conditional chain with poor formatting
+    if format_type=='json':result=json.dumps(report,indent=None,separators=(',',':'))
+    elif format_type=='pretty_json':result=json.dumps(report,indent=2)
+    elif format_type=='string':result=str(report)
+    else:result=report
+    
+    if output_file:
+        with open(output_file,'w')as f:f.write(result if isinstance(result,str)else json.dumps(result))
+    
+    return result
+
+class   DataProcessor  (  BadlyFormattedClass  )  :
+    def __init__(self,data_source,config=None,debug=False):
+        super().__init__("DataProcessor")
+        self.data_source=data_source;self.config=config or{};self.debug=debug
+        self.processed_data=[];self.errors=[];self.warnings=[]
+
+    def   load_data  (  self  )  :
+        try:
+            if isinstance(self.data_source,str):
+                if self.data_source.endswith('.json'):
+                    with open(self.data_source,'r')as f:data=json.load(f)
+                elif self.data_source.endswith('.csv'):data=pd.read_csv(self.data_source).to_dict('records')
+                else:raise ValueError(f"Unsupported file type: {self.data_source}")
+            elif isinstance(self.data_source,list):data=self.data_source
+            else:data=[self.data_source]
+            return data
+        except Exception as e:
+            self.errors.append(str(e));return[]
+
+    def validate_data(self,data):
+        valid_items=[];invalid_items=[]
+        for item in data:
+            if isinstance(item,dict)and'id'in item and'name'in item:valid_items.append(item)
+            else:invalid_items.append(item)
+        if invalid_items:self.warnings.append(f"Found {len(invalid_items)} invalid items")
+        return valid_items
+
+    def process(self):
+        data=self.load_data()
+        if not data:return{"success":False,"error":"No data loaded"}
+        
+        validated_data=self.validate_data(data)
+        processed_result=process_data(validated_data,
+                                    filter_func=lambda x:x.get('active',True),
+                                    transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},
+                                    sort_key=lambda x:x.get('name',''))
+        
+        self.processed_data=processed_result
+        return{"success":True,"count":len(processed_result),"data":processed_result}
+if __name__=="__main__":
+    sample_data=[{"id":1,"name":"Alice","active":True},{"id":2,"name":"Bob","active":False},{"id":3,"name":"Charlie","active":True}]
+    
+    processor=DataProcessor(sample_data,config={"debug":True})
+    result=processor.process()
+    
+    if result["success"]:
+        print(f"Successfully processed {result['count']} items")
+        for item in result["data"][:3]:print(f"- {item['name']} (ID: {item['id']})")
+    else:print(f"Processing failed: {result.get('error','Unknown error')}")
+    
+    # Generate report with poor formatting
+    report=generate_report(sample_data,include_stats=True,format_type='pretty_json')
+    print("Generated report:",report[:100]+"..."if len(report)>100 else report)
+    
+    # Complex calculation with poor spacing
+    numbers=[random.randint(1,100)for _ in range(50)]
+    stats=calculate_statistics(numbers)
+    complex_result=complex_nested_function(5,3,2)
+    
+    print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}")
+    print(f"Complex calculation result: {complex_result}")
\ No newline at end of file
diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 927a4d4cb..0b673ae28 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -13,14 +13,49 @@
     from pathlib import Path
 
 
+def should_format_file(filepath, max_lines_changed=50):
+        try:
+            # check if black is installed
+            subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+            result = subprocess.run(
+                ['black', '--diff', filepath], 
+                capture_output=True, 
+                text=True
+            )
+            
+            if result.returncode == 0 and not result.stdout:
+                return False
+                
+            diff_lines = [line for line in result.stdout.split('\n') 
+                        if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))]
+            
+            changes_count = len(diff_lines)
+            
+            if changes_count > max_lines_changed:
+                logger.debug(f"Skipping {filepath}: {changes_count} lines would change (max: {max_lines_changed})")
+                return False
+            
+            return True
+            
+        except subprocess.CalledProcessError:
+            logger.warning(f"black command failed for {filepath}")
+            return False
+        except FileNotFoundError:
+            logger.warning("black is not installed. Skipping formatting check.")
+            return False
+
+
+
 def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str:  # noqa
     # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution
     formatter_name = formatter_cmds[0].lower()
     if not path.exists():
         msg = f"File {path} does not exist. Cannot format the file."
         raise FileNotFoundError(msg)
-    if formatter_name == "disabled":
+    if formatter_name == "disabled" or not should_format_file(path):
         return path.read_text(encoding="utf8")
+
     file_token = "$file"  # noqa: S105
     for command in formatter_cmds:
         formatter_cmd_list = shlex.split(command, posix=os.name != "nt")
@@ -29,7 +64,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True
             result = subprocess.run(formatter_cmd_list, capture_output=True, check=False)
             if result.returncode == 0:
                 if print_status:
-                    console.rule(f"Formatted Successfully with: {formatter_name.replace('$file', path.name)}")
+                    console.rule(f"Formatted Successfully with: {command.replace('$file', path.name)}")
             else:
                 logger.error(f"Failed to format code with {' '.join(formatter_cmd_list)}")
         except FileNotFoundError as e:
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 5c0a91c38..14f6789e1 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -1,12 +1,17 @@
+import argparse
 import os
 import tempfile
 from pathlib import Path
 
 import pytest
+import shutil
 
 from codeflash.code_utils.config_parser import parse_config_file
 from codeflash.code_utils.formatter import format_code, sort_imports
 
+from codeflash.discovery.functions_to_optimize import FunctionToOptimize
+from codeflash.optimization.function_optimizer import FunctionOptimizer
+from codeflash.verification.verification_utils import TestConfig
 
 def test_remove_duplicate_imports():
     """Test that duplicate imports are removed when should_sort_imports is True."""
@@ -209,3 +214,67 @@ def foo():
         tmp_path = tmp.name
         with pytest.raises(FileNotFoundError):
             format_code(formatter_cmds=["exit 1"], path=Path(tmp_path))
+
+
+def _run_formatting_test(source_filename: str, should_content_change: bool):
+    """Helper function to run formatting tests with common setup and teardown."""
+    with tempfile.TemporaryDirectory() as test_dir_str:
+        test_dir = Path(test_dir_str)
+        this_file = Path(__file__).resolve()
+        repo_root_dir = this_file.parent.parent
+        source_file = repo_root_dir / "code_to_optimize" / source_filename
+
+        original = source_file.read_text()
+        target_path = test_dir / "target.py"
+        
+        shutil.copy2(source_file, target_path)
+
+        function_to_optimize = FunctionToOptimize(
+            function_name="process_data", 
+            parents=[], 
+            file_path=target_path
+        )
+
+        test_cfg = TestConfig(
+            tests_root=test_dir,
+            project_root_path=test_dir,
+            test_framework="pytest",
+            tests_project_rootdir=test_dir,
+        )
+
+        args = argparse.Namespace(
+            disable_imports_sorting=False,
+            formatter_cmds=[
+                "ruff check --exit-zero --fix $file",
+                "ruff format $file"
+            ],
+        )
+
+        optimizer = FunctionOptimizer(
+            function_to_optimize=function_to_optimize,
+            test_cfg=test_cfg,
+            args=args,
+        )
+        
+        optimizer.reformat_code_and_helpers(
+            helper_functions=[],
+            path=target_path,
+            original_code=optimizer.function_to_optimize_source_code,
+        )
+        
+        content = target_path.read_text()
+        
+        if should_content_change:
+            assert content != original, f"Expected content to change for {source_filename}"
+        else:
+            assert content == original, f"Expected content to remain unchanged for {source_filename}"
+
+
+def test_formatting_file_with_many_diffs():
+    """Test that files with many formatting errors are skipped (content unchanged)."""
+    _run_formatting_test("many_formatting_errors.py", should_content_change=False)
+
+
+def test_formatting_file_with_few_diffs():
+    """Test that files with few formatting errors are formatted (content changed)."""
+    _run_formatting_test("few_formatting_errors.py", should_content_change=True)
\ No newline at end of file

From 5cd13ad1caeb98fcc3b0c39f69b98d9abb84ed8f Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 11:58:55 +0300
Subject: [PATCH 02/26] new line

---
 tests/test_formatter.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 14f6789e1..3f45460eb 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -11,6 +11,7 @@
 
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
 from codeflash.optimization.function_optimizer import FunctionOptimizer
+from codeflash.optimization.function_optimizer import FunctionSource
 from codeflash.verification.verification_utils import TestConfig
 
 def test_remove_duplicate_imports():
@@ -257,7 +258,9 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
         )
         
         optimizer.reformat_code_and_helpers(
-            helper_functions=[],
+            helper_functions=[
+                FunctionSource()
+            ],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
         )
@@ -277,4 +280,4 @@ def test_formatting_file_with_many_diffs():
 
 def test_formatting_file_with_few_diffs():
     """Test that files with few formatting errors are formatted (content changed)."""
-    _run_formatting_test("few_formatting_errors.py", should_content_change=True)
\ No newline at end of file
+    _run_formatting_test("few_formatting_errors.py", should_content_change=True)

From 152222726c19b5abb28d983180334f0708bc5476 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 12:02:53 +0300
Subject: [PATCH 03/26] better log messages

---
 codeflash/code_utils/formatter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 0b673ae28..3d6eff6cd 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -39,10 +39,10 @@ def should_format_file(filepath, max_lines_changed=50):
             return True
             
         except subprocess.CalledProcessError:
-            logger.warning(f"black command failed for {filepath}")
+            logger.warning(f"black --diff command failed for {filepath}")
             return False
         except FileNotFoundError:
-            logger.warning("black is not installed. Skipping formatting check.")
+            logger.warning("black formatter is not installed. Skipping formatting diff check.")
             return False
 
 

From d3ca1cbf94e464d0cbecd0c234cb20885b7bf517 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 12:10:43 +0300
Subject: [PATCH 04/26] remove unnecessary check

---
 codeflash/code_utils/formatter.py | 3 ---
 tests/test_formatter.py           | 5 +----
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 3d6eff6cd..f301bd013 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -23,9 +23,6 @@ def should_format_file(filepath, max_lines_changed=50):
                 capture_output=True, 
                 text=True
             )
-            
-            if result.returncode == 0 and not result.stdout:
-                return False
                 
             diff_lines = [line for line in result.stdout.split('\n') 
                         if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))]
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 3f45460eb..7b0a43b42 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -11,7 +11,6 @@
 
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
 from codeflash.optimization.function_optimizer import FunctionOptimizer
-from codeflash.optimization.function_optimizer import FunctionSource
 from codeflash.verification.verification_utils import TestConfig
 
 def test_remove_duplicate_imports():
@@ -258,9 +257,7 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
         )
         
         optimizer.reformat_code_and_helpers(
-            helper_functions=[
-                FunctionSource()
-            ],
+            helper_functions=[],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
         )

From dcb084ad12df7e01b82593e3a5f47a8b15a534e3 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 12:14:59 +0300
Subject: [PATCH 05/26] new line

---
 code_to_optimize/many_formatting_errors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py
index bd792e3d3..702539f70 100644
--- a/code_to_optimize/many_formatting_errors.py
+++ b/code_to_optimize/many_formatting_errors.py
@@ -144,4 +144,4 @@ def process(self):
     complex_result=complex_nested_function(5,3,2)
     
     print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}")
-    print(f"Complex calculation result: {complex_result}")
\ No newline at end of file
+    print(f"Complex calculation result: {complex_result}")

From 689a2d97af6e617407f1075da5e85ec9d67b8097 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 13:00:22 +0300
Subject: [PATCH 06/26] remove unused comment

---
 tests/test_formatter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 7b0a43b42..3106ee330 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -217,7 +217,6 @@ def foo():
 
 
 def _run_formatting_test(source_filename: str, should_content_change: bool):
-    """Helper function to run formatting tests with common setup and teardown."""
     with tempfile.TemporaryDirectory() as test_dir_str:
         test_dir = Path(test_dir_str)
         this_file = Path(__file__).resolve()

From 44c0f85b6f7c1b4b047528426e6157fae852e681 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 13:55:10 +0300
Subject: [PATCH 07/26]  the max lines for formatting changes to 100

---
 codeflash/code_utils/formatter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index f301bd013..13b330746 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -13,7 +13,7 @@
     from pathlib import Path
 
 
-def should_format_file(filepath, max_lines_changed=50):
+def should_format_file(filepath, max_lines_changed=100):
         try:
             # check if black is installed
             subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

From 73ef51821ac8c3ec5daafe4234a5bf1d518f30f2 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 19:39:15 +0300
Subject: [PATCH 08/26] refactoring

---
 code_to_optimize/few_formatting_errors.py  |  2 +-
 code_to_optimize/many_formatting_errors.py |  4 +-
 codeflash/code_utils/formatter.py          | 89 ++++++++++++++--------
 tests/test_formatter.py                    |  7 ++
 4 files changed, 66 insertions(+), 36 deletions(-)

diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py
index 905be2b39..27ed71b44 100644
--- a/code_to_optimize/few_formatting_errors.py
+++ b/code_to_optimize/few_formatting_errors.py
@@ -1,6 +1,6 @@
 import os
 
-class BadlyFormattedClass(object):
+class UnformattedExampleClass(object):
     def __init__(
         self,
         name,
diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py
index 702539f70..79cfc825d 100644
--- a/code_to_optimize/many_formatting_errors.py
+++ b/code_to_optimize/many_formatting_errors.py
@@ -3,7 +3,7 @@
 
 # This is a poorly formatted Python file with many style violations
 
-class   BadlyFormattedClass( object ):
+class   UnformattedExampleClass( object ):
     def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None):
         self.name=name;self.age=age;self.email=email;self.phone=phone
         self.address=address;self.city=city;self.state=state;self.zip_code=zip_code
@@ -84,7 +84,7 @@ def generate_report(data,include_stats=True,include_charts=False,format_type='js
     
     return result
 
-class   DataProcessor  (  BadlyFormattedClass  )  :
+class   DataProcessor  (  UnformattedExampleClass  )  :
     def __init__(self,data_source,config=None,debug=False):
         super().__init__("DataProcessor")
         self.data_source=data_source;self.config=config or{};self.debug=debug
diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 13b330746..94b5c7dc5 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -3,7 +3,7 @@
 import os
 import shlex
 import subprocess
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
 
 import isort
 
@@ -12,37 +12,60 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
-
-def should_format_file(filepath, max_lines_changed=100):
-        try:
-            # check if black is installed
-            subprocess.run(['black', '--version'], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-
-            result = subprocess.run(
-                ['black', '--diff', filepath], 
-                capture_output=True, 
-                text=True
-            )
-                
-            diff_lines = [line for line in result.stdout.split('\n') 
-                        if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))]
-            
-            changes_count = len(diff_lines)
-            
-            if changes_count > max_lines_changed:
-                logger.debug(f"Skipping {filepath}: {changes_count} lines would change (max: {max_lines_changed})")
-                return False
-            
-            return True
-            
-        except subprocess.CalledProcessError:
-            logger.warning(f"black --diff command failed for {filepath}")
-            return False
-        except FileNotFoundError:
-            logger.warning("black formatter is not installed. Skipping formatting diff check.")
-            return False
-
-
+def get_diff_lines_output_by_black(filepath: str) -> Optional[str]:
+    try:
+        subprocess.run(['black', '--version'], check=True,
+                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        result = subprocess.run(
+            ['black', '--diff', filepath],
+            capture_output=True,
+            text=True
+        )
+        return result.stdout.strip() if result.stdout else None
+    except (FileNotFoundError):
+        return None
+
+
+def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]:
+    try:
+        subprocess.run(['ruff', '--version'], check=True,
+                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        result = subprocess.run(
+            ['ruff', "format", '--diff', filepath],
+            capture_output=True,
+            text=True
+        )
+        return result.stdout.strip() if result.stdout else None
+    except (FileNotFoundError):
+        return None
+
+
+def get_diff_lines_count(diff_output: str) -> int:
+    diff_lines = [line for line in diff_output.split('\n') 
+                  if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))]
+    return len(diff_lines)
+
+def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
+    diff_changes_stdout = None
+
+    diff_changes_stdout = get_diff_lines_output_by_black(filepath)
+
+    if diff_changes_stdout is None:
+        logger.warning(f"black formatter not found, trying ruff instead...")
+        diff_changes_stdout = get_diff_lines_output_by_ruff(filepath)
+        if diff_changes_stdout is None:
+            msg = f"Both ruff, black formatters not found, skipping formatting diff check."
+            logger.warning(msg)
+            raise FileNotFoundError(msg)
+    
+    diff_lines_count = get_diff_lines_count(diff_changes_stdout)
+    
+    if diff_lines_count > max_diff_lines:
+        logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})")
+        return False
+    else:
+        return True
+        
 
 def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str:  # noqa
     # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution
@@ -50,7 +73,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True
     if not path.exists():
         msg = f"File {path} does not exist. Cannot format the file."
         raise FileNotFoundError(msg)
-    if formatter_name == "disabled" or not should_format_file(path):
+    if formatter_name == "disabled" or not is_safe_to_format(path):     # few -> False, large -> True
         return path.read_text(encoding="utf8")
 
     file_token = "$file"  # noqa: S105
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 3106ee330..ed2d7233a 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -268,12 +268,19 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
         else:
             assert content == original, f"Expected content to remain unchanged for {source_filename}"
 
+def _ruff_or_black_installed() -> bool:
+    return shutil.which("black") is not None or shutil.which("ruff") is not None
+
 
 def test_formatting_file_with_many_diffs():
     """Test that files with many formatting errors are skipped (content unchanged)."""
+    if not _ruff_or_black_installed():
+        pytest.skip("Neither black nor ruff is installed, skipping formatting tests.")
     _run_formatting_test("many_formatting_errors.py", should_content_change=False)
 
 
 def test_formatting_file_with_few_diffs():
     """Test that files with few formatting errors are formatted (content changed)."""
+    if not _ruff_or_black_installed():
+        pytest.skip("Neither black nor ruff is installed, skipping formatting tests.")
     _run_formatting_test("few_formatting_errors.py", should_content_change=True)

From a5343fd9454eebf471fc893ad587a33a2e75b705 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 23:37:18 +0300
Subject: [PATCH 09/26] refactoring and improvements

---
 codeflash/code_utils/formatter.py | 61 ++++++++++++++++++-------------
 tests/test_formatter.py           |  3 +-
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 94b5c7dc5..3d5b587c6 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -4,7 +4,6 @@
 import shlex
 import subprocess
 from typing import TYPE_CHECKING, Optional
-
 import isort
 
 from codeflash.cli_cmds.console import console, logger
@@ -12,37 +11,48 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
-def get_diff_lines_output_by_black(filepath: str) -> Optional[str]:
+def get_nth_line(text: str, n: int) -> str | None:
+    for i, line in enumerate(text.splitlines(), start=1):
+        if i == n:
+            return line
+    return None
+
+def get_diff_output(cmd: list[str]) -> Optional[str]:
     try:
-        subprocess.run(['black', '--version'], check=True,
-                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        result = subprocess.run(
-            ['black', '--diff', filepath],
-            capture_output=True,
-            text=True
-        )
-        return result.stdout.strip() if result.stdout else None
-    except (FileNotFoundError):
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        return result.stdout.strip() or None
+    except (FileNotFoundError, subprocess.CalledProcessError) as e:
+        if isinstance(e, subprocess.CalledProcessError):
+            # ruff returns 1 when the file needs formatting, and 0 when it is already formatted
+            is_ruff = cmd[0] == "ruff"
+            if e.returncode == 0 and is_ruff:
+                return ""
+            elif e.returncode == 1 and is_ruff:
+                return e.stdout.strip() or None
         return None
 
 
+def get_diff_lines_output_by_black(filepath: str) -> Optional[str]:
+    try:
+        import black  # type: ignore
+        return get_diff_output(['black', '--diff', filepath])
+    except ImportError:
+        return None
+
 def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]:
     try:
-        subprocess.run(['ruff', '--version'], check=True,
-                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        result = subprocess.run(
-            ['ruff', "format", '--diff', filepath],
-            capture_output=True,
-            text=True
-        )
-        return result.stdout.strip() if result.stdout else None
-    except (FileNotFoundError):
+        import ruff  # type: ignore
+        return get_diff_output(['ruff', 'format', '--diff', filepath])
+    except ImportError:
+        print("can't import ruff")
         return None
 
 
 def get_diff_lines_count(diff_output: str) -> int:
-    diff_lines = [line for line in diff_output.split('\n') 
-                  if line.startswith(('+', '-')) and not line.startswith(('+++', '---'))]
+    lines = diff_output.split('\n')
+    def is_diff_line(line: str) -> bool:
+        return line.startswith(('+', '-')) and not line.startswith(('+++', '---'))
+    diff_lines = [line for line in lines if is_diff_line(line)]
     return len(diff_lines)
 
 def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
@@ -54,9 +64,8 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
         logger.warning(f"black formatter not found, trying ruff instead...")
         diff_changes_stdout = get_diff_lines_output_by_ruff(filepath)
         if diff_changes_stdout is None:
-            msg = f"Both ruff, black formatters not found, skipping formatting diff check."
-            logger.warning(msg)
-            raise FileNotFoundError(msg)
+            logger.warning(f"Both ruff, black formatters not found, skipping formatting diff check.")
+            return False
     
     diff_lines_count = get_diff_lines_count(diff_changes_stdout)
     
@@ -73,7 +82,7 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True
     if not path.exists():
         msg = f"File {path} does not exist. Cannot format the file."
         raise FileNotFoundError(msg)
-    if formatter_name == "disabled" or not is_safe_to_format(path):     # few -> False, large -> True
+    if formatter_name == "disabled" or not is_safe_to_format(str(path)):
         return path.read_text(encoding="utf8")
 
     file_token = "$file"  # noqa: S105
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index ed2d7233a..c2e7864e6 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -217,6 +217,8 @@ def foo():
 
 
 def _run_formatting_test(source_filename: str, should_content_change: bool):
+    if shutil.which("ruff") is None:
+        pytest.skip("ruff is not installed, skipping.")
     with tempfile.TemporaryDirectory() as test_dir_str:
         test_dir = Path(test_dir_str)
         this_file = Path(__file__).resolve()
@@ -262,7 +264,6 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
         )
         
         content = target_path.read_text()
-        
         if should_content_change:
             assert content != original, f"Expected content to change for {source_filename}"
         else:

From 395855d5c214c963d0c4784ccb3a42926074b6df Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 23:50:44 +0300
Subject: [PATCH 10/26] added black as dev dependency

---
 poetry.lock             | 68 +++++++++++++++++++++++++++++++++++++++--
 pyproject.toml          |  1 +
 tests/test_formatter.py | 13 +++-----
 3 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 04cfeae09..b80c86387 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -73,6 +73,53 @@ files = [
     {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"},
 ]
 
+[[package]]
+name = "black"
+version = "25.1.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
+    {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
+    {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"},
+    {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"},
+    {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"},
+    {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"},
+    {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"},
+    {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"},
+    {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"},
+    {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"},
+    {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"},
+    {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"},
+    {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"},
+    {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"},
+    {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"},
+    {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"},
+    {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"},
+    {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"},
+    {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"},
+    {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"},
+    {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"},
+    {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.10)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
 [[package]]
 name = "blessed"
 version = "1.21.0"
@@ -248,7 +295,7 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
-groups = ["main"]
+groups = ["main", "dev"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -264,11 +311,11 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main", "dev"]
+markers = "sys_platform == \"win32\" or platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
 
 [[package]]
 name = "coverage"
@@ -1025,8 +1072,11 @@ files = [
     {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
     {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
+    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
+    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
     {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
     {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
     {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},
@@ -1344,6 +1394,18 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -2686,4 +2748,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9"
-content-hash = "1a73e9db33e3884cf1cc6e3371816aebd20831845ef9bf671be315e659480e86"
+content-hash = "d0b959755aad4882df502f8ba219b865df472ba1830d5adf8e757aa6436bc3df"
diff --git a/pyproject.toml b/pyproject.toml
index c3e48f889..dd38137ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -123,6 +123,7 @@ types-pexpect = "^4.9.0.20241208"
 types-unidiff = "^0.7.0.20240505"
 uv = ">=0.6.2"
 pre-commit = "^4.2.0"
+black = "^25.1.0"
 
 [tool.poetry.build]
 script = "codeflash/update_license_version.py"
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index c2e7864e6..b6c87b190 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -217,8 +217,10 @@ def foo():
 
 
 def _run_formatting_test(source_filename: str, should_content_change: bool):
-    if shutil.which("ruff") is None:
-        pytest.skip("ruff is not installed, skipping.")
+    try:
+        import ruff  # type: ignore
+    except ImportError:
+        pytest.skip("ruff is not installed")
     with tempfile.TemporaryDirectory() as test_dir_str:
         test_dir = Path(test_dir_str)
         this_file = Path(__file__).resolve()
@@ -269,19 +271,12 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
         else:
             assert content == original, f"Expected content to remain unchanged for {source_filename}"
 
-def _ruff_or_black_installed() -> bool:
-    return shutil.which("black") is not None or shutil.which("ruff") is not None
-
 
 def test_formatting_file_with_many_diffs():
     """Test that files with many formatting errors are skipped (content unchanged)."""
-    if not _ruff_or_black_installed():
-        pytest.skip("Neither black nor ruff is installed, skipping formatting tests.")
     _run_formatting_test("many_formatting_errors.py", should_content_change=False)
 
 
 def test_formatting_file_with_few_diffs():
     """Test that files with few formatting errors are formatted (content changed)."""
-    if not _ruff_or_black_installed():
-        pytest.skip("Neither black nor ruff is installed, skipping formatting tests.")
     _run_formatting_test("few_formatting_errors.py", should_content_change=True)

From 822d6cc015d1a5dc3e6c28bea4a1ef599cb19a05 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Tue, 3 Jun 2025 23:57:55 +0300
Subject: [PATCH 11/26] made some refactor changes that codeflash suggested

---
 codeflash/code_utils/formatter.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 3d5b587c6..e1d269aa7 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -27,7 +27,7 @@ def get_diff_output(cmd: list[str]) -> Optional[str]:
             is_ruff = cmd[0] == "ruff"
             if e.returncode == 0 and is_ruff:
                 return ""
-            elif e.returncode == 1 and is_ruff:
+            if e.returncode == 1 and is_ruff:
                 return e.stdout.strip() or None
         return None
 
@@ -61,10 +61,10 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
     diff_changes_stdout = get_diff_lines_output_by_black(filepath)
 
     if diff_changes_stdout is None:
-        logger.warning(f"black formatter not found, trying ruff instead...")
+        logger.warning("black formatter not found, trying ruff instead...")
         diff_changes_stdout = get_diff_lines_output_by_ruff(filepath)
         if diff_changes_stdout is None:
-            logger.warning(f"Both ruff, black formatters not found, skipping formatting diff check.")
+            logger.warning("Both ruff, black formatters not found, skipping formatting diff check.")
             return False
     
     diff_lines_count = get_diff_lines_count(diff_changes_stdout)
@@ -72,8 +72,8 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
     if diff_lines_count > max_diff_lines:
         logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})")
         return False
-    else:
-        return True
+
+    return True
         
 
 def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str:  # noqa

From ce1502284a07e2adcf0c5a0ec080ff360ab81eab Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Wed, 4 Jun 2025 00:42:40 +0300
Subject: [PATCH 12/26] remove unused function

---
 codeflash/code_utils/formatter.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index e1d269aa7..ec077f444 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -11,12 +11,6 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
-def get_nth_line(text: str, n: int) -> str | None:
-    for i, line in enumerate(text.splitlines(), start=1):
-        if i == n:
-            return line
-    return None
-
 def get_diff_output(cmd: list[str]) -> Optional[str]:
     try:
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)

From d2a87116ec4702fefdd240f25d88a0073ef7ea0d Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Wed, 4 Jun 2025 02:27:24 +0300
Subject: [PATCH 13/26] formatting & using internal black dep

---
 codeflash/code_utils/formatter.py | 65 +++++++++++--------------------
 poetry.lock                       | 10 ++---
 pyproject.toml                    |  2 +-
 tests/test_formatter.py           |  3 +-
 4 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index ec077f444..3144416e1 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -4,6 +4,7 @@
 import shlex
 import subprocess
 from typing import TYPE_CHECKING, Optional
+
 import isort
 
 from codeflash.cli_cmds.console import console, logger
@@ -11,64 +12,43 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
-def get_diff_output(cmd: list[str]) -> Optional[str]:
-    try:
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        return result.stdout.strip() or None
-    except (FileNotFoundError, subprocess.CalledProcessError) as e:
-        if isinstance(e, subprocess.CalledProcessError):
-            # ruff returns 1 when the file needs formatting, and 0 when it is already formatted
-            is_ruff = cmd[0] == "ruff"
-            if e.returncode == 0 and is_ruff:
-                return ""
-            if e.returncode == 1 and is_ruff:
-                return e.stdout.strip() or None
-        return None
-
 
-def get_diff_lines_output_by_black(filepath: str) -> Optional[str]:
+def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]:
     try:
-        import black  # type: ignore
-        return get_diff_output(['black', '--diff', filepath])
-    except ImportError:
-        return None
+        import black
 
-def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]:
-    try:
-        import ruff  # type: ignore
-        return get_diff_output(['ruff', 'format', '--diff', filepath])
+        formatted_content = black.format_file_contents(src_contents=unformatted_content, fast=True, mode=black.Mode())
+        return black.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath)
     except ImportError:
-        print("can't import ruff")
         return None
 
 
 def get_diff_lines_count(diff_output: str) -> int:
-    lines = diff_output.split('\n')
+    lines = diff_output.split("\n")
+
     def is_diff_line(line: str) -> bool:
-        return line.startswith(('+', '-')) and not line.startswith(('+++', '---'))
+        return line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
+
     diff_lines = [line for line in lines if is_diff_line(line)]
     return len(diff_lines)
 
-def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool:
-    diff_changes_stdout = None
 
-    diff_changes_stdout = get_diff_lines_output_by_black(filepath)
+def is_safe_to_format(filepath: str, content: str, max_diff_lines: int = 100) -> bool:
+    diff_changes_str = None
+
+    diff_changes_str = get_diff_output_by_black(filepath, unformatted_content=content)
 
-    if diff_changes_stdout is None:
-        logger.warning("black formatter not found, trying ruff instead...")
-        diff_changes_stdout = get_diff_lines_output_by_ruff(filepath)
-        if diff_changes_stdout is None:
-            logger.warning("Both ruff, black formatters not found, skipping formatting diff check.")
-            return False
-    
-    diff_lines_count = get_diff_lines_count(diff_changes_stdout)
-    
+    if diff_changes_str is None:
+        logger.warning("Looks like black formatter not found, make sure it is installed.")
+        return False
+
+    diff_lines_count = get_diff_lines_count(diff_changes_str)
     if diff_lines_count > max_diff_lines:
-        logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})")
+        logger.debug(f"Skipping formatting {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})")
         return False
 
     return True
-        
+
 
 def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str:  # noqa
     # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution
@@ -76,8 +56,9 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True
     if not path.exists():
         msg = f"File {path} does not exist. Cannot format the file."
         raise FileNotFoundError(msg)
-    if formatter_name == "disabled" or not is_safe_to_format(str(path)):
-        return path.read_text(encoding="utf8")
+    file_content = path.read_text(encoding="utf8")
+    if formatter_name == "disabled" or not is_safe_to_format(filepath=str(path), content=file_content):
+        return file_content
 
     file_token = "$file"  # noqa: S105
     for command in formatter_cmds:
diff --git a/poetry.lock b/poetry.lock
index b80c86387..ab3e6054b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -79,7 +79,7 @@ version = "25.1.0"
 description = "The uncompromising code formatter."
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main"]
 files = [
     {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
     {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
@@ -295,7 +295,7 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev"]
+groups = ["main"]
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -311,11 +311,11 @@ description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 groups = ["main", "dev"]
-markers = "sys_platform == \"win32\" or platform_system == \"Windows\""
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
 
 [[package]]
 name = "coverage"
@@ -1400,7 +1400,7 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main"]
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -2748,4 +2748,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9"
-content-hash = "d0b959755aad4882df502f8ba219b865df472ba1830d5adf8e757aa6436bc3df"
+content-hash = "1ba28119bcc2b572133da8f243eea42fc8f732b6255afac7c2c7e616e2c68677"
diff --git a/pyproject.toml b/pyproject.toml
index dd38137ee..6a5c4904a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,6 +93,7 @@ crosshair-tool = ">=0.0.78"
 coverage = ">=7.6.4"
 line_profiler=">=4.2.0" #this is the minimum version which supports python 3.13
 platformdirs = ">=4.3.7"
+black = "^25.1.0"
 [tool.poetry.group.dev]
 optional = true
 
@@ -123,7 +124,6 @@ types-pexpect = "^4.9.0.20241208"
 types-unidiff = "^0.7.0.20240505"
 uv = ">=0.6.2"
 pre-commit = "^4.2.0"
-black = "^25.1.0"
 
 [tool.poetry.build]
 script = "codeflash/update_license_version.py"
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index b6c87b190..b500bbb4f 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -259,13 +259,12 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
             args=args,
         )
         
-        optimizer.reformat_code_and_helpers(
+        content, _ = optimizer.reformat_code_and_helpers(
             helper_functions=[],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
         )
         
-        content = target_path.read_text()
         if should_content_change:
             assert content != original, f"Expected content to change for {source_filename}"
         else:

From f46b3683b1391517cd13d3b666fdcf10fb382861 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Wed, 4 Jun 2025 03:01:51 +0300
Subject: [PATCH 14/26] fix black import issue

---
 codeflash/code_utils/formatter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 3144416e1..afbced761 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -15,10 +15,10 @@
 
 def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]:
     try:
-        import black
+        from black import Mode, format_file_contents, output
 
-        formatted_content = black.format_file_contents(src_contents=unformatted_content, fast=True, mode=black.Mode())
-        return black.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath)
+        formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode())
+        return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath)
     except ImportError:
         return None
 

From 6504cc4cc92725ca1dace7a57759dfe0c124fb0d Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Wed, 4 Jun 2025 03:51:02 +0300
Subject: [PATCH 15/26] handle formatting files with no formatting issues

---
 code_to_optimize/no_formatting_errors.py | 71 ++++++++++++++++++++++++
 codeflash/code_utils/formatter.py        |  4 +-
 tests/test_formatter.py                  |  4 ++
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 code_to_optimize/no_formatting_errors.py

diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py
new file mode 100644
index 000000000..3d32bc94c
--- /dev/null
+++ b/code_to_optimize/no_formatting_errors.py
@@ -0,0 +1,71 @@
+import os, sys, json, datetime, math, random
+import requests
+from collections import defaultdict, OrderedDict
+from typing import List, Dict, Optional, Union, Tuple, Any
+import numpy as np
+import pandas as pd
+
+# This is a poorly formatted Python file with many style violations
+
+
+class UnformattedExampleClass(object):
+    def __init__(
+        self,
+        name,
+        age=None,
+        email=None,
+        phone=None,
+        address=None,
+        city=None,
+        state=None,
+        zip_code=None,
+    ):
+        self.name = name
+        self.age = age
+        self.email = email
+        self.phone = phone
+        self.address = address
+        self.city = city
+        self.state = state
+        self.zip_code = zip_code
+        self.data = {"name": name, "age": age, "email": email}
+
+    def get_info(self):
+        return f"Name: {self.name}, Age: {self.age}"
+
+    def update_data(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        self.data.update(kwargs)
+
+
+def process_data(
+    data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False
+):
+    if not data_list:
+        return []
+    if filter_func:
+        data_list = [item for item in data_list if filter_func(item)]
+    if transform_func:
+        data_list = [transform_func(item) for item in data_list]
+    if sort_key:
+        data_list = sorted(data_list, key=sort_key, reverse=reverse)
+    return data_list
+
+
+def calculate_statistics(numbers):
+    if not numbers:
+        return None
+    mean = sum(numbers) / len(numbers)
+    median = sorted(numbers)[len(numbers) // 2]
+    variance = sum((x - mean) ** 2 for x in numbers) / len(numbers)
+    std_dev = math.sqrt(variance)
+    return {
+        "mean": mean,
+        "median": median,
+        "variance": variance,
+        "std_dev": std_dev,
+        "min": min(numbers),
+        "max": max(numbers),
+    }
diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index afbced761..6188e8649 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -15,11 +15,11 @@
 
 def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]:
     try:
-        from black import Mode, format_file_contents, output
+        from black import Mode, format_file_contents, output, report
 
         formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode())
         return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath)
-    except ImportError:
+    except (ImportError, report.NothingChanged):
         return None
 
 
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index b500bbb4f..baf5b8079 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -279,3 +279,7 @@ def test_formatting_file_with_many_diffs():
 def test_formatting_file_with_few_diffs():
     """Test that files with few formatting errors are formatted (content changed)."""
     _run_formatting_test("few_formatting_errors.py", should_content_change=True)
+
+def test_formatting_file_with_no_diffs():
+    """Test that files with no formatting errors are unchanged."""
+    _run_formatting_test("no_formatting_errors.py", should_content_change=False)

From 82a4ee17862297be769e4d85a7a7cf808456ad02 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:05:25 +0300
Subject: [PATCH 16/26] use user pre-defined formatting commands, instead of
 using black

---
 code_to_optimize/no_formatting_errors.py     |  27 +---
 codeflash/code_utils/formatter.py            | 126 ++++++++++++-------
 codeflash/optimization/function_optimizer.py |  11 +-
 poetry.lock                                  |  61 +--------
 pyproject.toml                               |   1 -
 tests/test_formatter.py                      |  10 ++
 6 files changed, 106 insertions(+), 130 deletions(-)

diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py
index 3d32bc94c..c521ef420 100644
--- a/code_to_optimize/no_formatting_errors.py
+++ b/code_to_optimize/no_formatting_errors.py
@@ -1,25 +1,8 @@
-import os, sys, json, datetime, math, random
-import requests
-from collections import defaultdict, OrderedDict
-from typing import List, Dict, Optional, Union, Tuple, Any
-import numpy as np
-import pandas as pd
+import math
 
-# This is a poorly formatted Python file with many style violations
 
-
-class UnformattedExampleClass(object):
-    def __init__(
-        self,
-        name,
-        age=None,
-        email=None,
-        phone=None,
-        address=None,
-        city=None,
-        state=None,
-        zip_code=None,
-    ):
+class UnformattedExampleClass:
+    def __init__(self, name, age=None, email=None, phone=None, address=None, city=None, state=None, zip_code=None):
         self.name = name
         self.age = age
         self.email = email
@@ -40,9 +23,7 @@ def update_data(self, **kwargs):
         self.data.update(kwargs)
 
 
-def process_data(
-    data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False
-):
+def process_data(data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False):
     if not data_list:
         return []
     if filter_func:
diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 6188e8649..0a51c303c 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -1,69 +1,73 @@
 from __future__ import annotations
 
+import difflib
 import os
+import re
 import shlex
+import shutil
 import subprocess
-from typing import TYPE_CHECKING, Optional
+import tempfile
+from pathlib import Path
+from typing import Optional
 
 import isort
 
 from codeflash.cli_cmds.console import console, logger
 
-if TYPE_CHECKING:
-    from pathlib import Path
 
+def generate_unified_diff(original: str, modified: str, from_file: str, to_file: str) -> str:
+    line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
 
-def get_diff_output_by_black(filepath: str, unformatted_content: str) -> Optional[str]:
-    try:
-        from black import Mode, format_file_contents, output, report
+    def split_lines(text: str) -> list[str]:
+        lines = [match[0] for match in line_pattern.finditer(text)]
+        if lines and lines[-1] == "":
+            lines.pop()
+        return lines
 
-        formatted_content = format_file_contents(src_contents=unformatted_content, fast=True, mode=Mode())
-        return output.diff(unformatted_content, formatted_content, a_name=filepath, b_name=filepath)
-    except (ImportError, report.NothingChanged):
-        return None
+    original_lines = split_lines(original)
+    modified_lines = split_lines(modified)
 
+    diff_output = []
+    for line in difflib.unified_diff(original_lines, modified_lines, fromfile=from_file, tofile=to_file, n=5):
+        if line.endswith("\n"):
+            diff_output.append(line)
+        else:
+            diff_output.append(line + "\n")
+            diff_output.append("\\ No newline at end of file\n")
 
-def get_diff_lines_count(diff_output: str) -> int:
-    lines = diff_output.split("\n")
-
-    def is_diff_line(line: str) -> bool:
-        return line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
-
-    diff_lines = [line for line in lines if is_diff_line(line)]
-    return len(diff_lines)
+    return "".join(diff_output)
 
 
-def is_safe_to_format(filepath: str, content: str, max_diff_lines: int = 100) -> bool:
-    diff_changes_str = None
-
-    diff_changes_str = get_diff_output_by_black(filepath, unformatted_content=content)
-
-    if diff_changes_str is None:
-        logger.warning("Looks like black formatter not found, make sure it is installed.")
-        return False
-
-    diff_lines_count = get_diff_lines_count(diff_changes_str)
-    if diff_lines_count > max_diff_lines:
-        logger.debug(f"Skipping formatting {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})")
-        return False
+def apply_formatter_cmds(
+    cmds: list[str],
+    path: Path,
+    test_dir_str: Optional[str],
+    print_status: bool,  # noqa
+) -> tuple[Path, str]:
+    # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution
+    formatter_name = cmds[0].lower()
+    should_make_copy = False
+    file_path = path
 
-    return True
+    if test_dir_str:
+        should_make_copy = True
+        file_path = Path(test_dir_str) / "temp.py"
 
+    if not cmds or formatter_name == "disabled":
+        return path, path.read_text(encoding="utf8")
 
-def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str:  # noqa
-    # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution
-    formatter_name = formatter_cmds[0].lower()
     if not path.exists():
-        msg = f"File {path} does not exist. Cannot format the file."
+        msg = f"File {path} does not exist. Cannot apply formatter commands."
         raise FileNotFoundError(msg)
-    file_content = path.read_text(encoding="utf8")
-    if formatter_name == "disabled" or not is_safe_to_format(filepath=str(path), content=file_content):
-        return file_content
+
+    if should_make_copy:
+        shutil.copy2(path, file_path)
 
     file_token = "$file"  # noqa: S105
-    for command in formatter_cmds:
+
+    for command in cmds:
         formatter_cmd_list = shlex.split(command, posix=os.name != "nt")
-        formatter_cmd_list = [path.as_posix() if chunk == file_token else chunk for chunk in formatter_cmd_list]
+        formatter_cmd_list = [file_path.as_posix() if chunk == file_token else chunk for chunk in formatter_cmd_list]
         try:
             result = subprocess.run(formatter_cmd_list, capture_output=True, check=False)
             if result.returncode == 0:
@@ -83,7 +87,45 @@ def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True
 
             raise e from None
 
-    return path.read_text(encoding="utf8")
+    return file_path, file_path.read_text(encoding="utf8")
+
+
+def get_diff_lines_count(diff_output: str) -> int:
+    lines = diff_output.split("\n")
+
+    def is_diff_line(line: str) -> bool:
+        return line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
+
+    diff_lines = [line for line in lines if is_diff_line(line)]
+    return len(diff_lines)
+
+
+def format_code(formatter_cmds: list[str], path: Path, optimized_function: str = "", print_status: bool = True) -> str:  # noqa
+    with tempfile.TemporaryDirectory() as test_dir_str:
+        max_diff_lines = 100
+
+        original_code = path.read_text(encoding="utf8")
+        # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided)
+        original_code_without_opfunc = original_code.replace(optimized_function, "")
+
+        original_temp = Path(test_dir_str) / "original_temp.py"
+        original_temp.write_text(original_code_without_opfunc, encoding="utf8")
+
+        formatted_temp, formatted_code = apply_formatter_cmds(
+            formatter_cmds, original_temp, test_dir_str, print_status=False
+        )
+
+        diff_output = generate_unified_diff(
+            original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp)
+        )
+        diff_lines_count = get_diff_lines_count(diff_output)
+        if diff_lines_count > max_diff_lines:
+            logger.debug(f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})")
+            return original_code
+
+        _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status)
+        logger.debug(f"Formatted {path} with commands: {formatter_cmds}")
+        return formatted_code
 
 
 def sort_imports(code: str) -> str:
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 12aeff3fa..1e918b40f 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -302,7 +302,10 @@ def optimize_function(self) -> Result[BestOptimization, str]:  # noqa: PLR0911
                 )
 
                 new_code, new_helper_code = self.reformat_code_and_helpers(
-                    code_context.helper_functions, explanation.file_path, self.function_to_optimize_source_code
+                    code_context.helper_functions,
+                    explanation.file_path,
+                    self.function_to_optimize_source_code,
+                    optimized_function=best_optimization.candidate.source_code,
                 )
 
                 existing_tests = existing_tests_source_for(
@@ -591,18 +594,18 @@ def write_code_and_helpers(original_code: str, original_helper_code: dict[Path,
                 f.write(helper_code)
 
     def reformat_code_and_helpers(
-        self, helper_functions: list[FunctionSource], path: Path, original_code: str
+        self, helper_functions: list[FunctionSource], path: Path, original_code: str, optimized_function: str
     ) -> tuple[str, dict[Path, str]]:
         should_sort_imports = not self.args.disable_imports_sorting
         if should_sort_imports and isort.code(original_code) != original_code:
             should_sort_imports = False
 
-        new_code = format_code(self.args.formatter_cmds, path)
+        new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function)
         if should_sort_imports:
             new_code = sort_imports(new_code)
 
         new_helper_code: dict[Path, str] = {}
-        helper_functions_paths = {hf.file_path for hf in helper_functions}
+        helper_functions_paths = {hf.source_code for hf in helper_functions}
         for module_abspath in helper_functions_paths:
             formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath)
             if should_sort_imports:
diff --git a/poetry.lock b/poetry.lock
index ab3e6054b..825a15b02 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -73,53 +73,6 @@ files = [
     {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"},
 ]
 
-[[package]]
-name = "black"
-version = "25.1.0"
-description = "The uncompromising code formatter."
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
-    {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
-    {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"},
-    {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"},
-    {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"},
-    {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"},
-    {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"},
-    {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"},
-    {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"},
-    {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"},
-    {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"},
-    {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"},
-    {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"},
-    {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"},
-    {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"},
-    {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"},
-    {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"},
-    {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"},
-    {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"},
-    {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"},
-    {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"},
-    {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"},
-]
-
-[package.dependencies]
-click = ">=8.0.0"
-mypy-extensions = ">=0.4.3"
-packaging = ">=22.0"
-pathspec = ">=0.9.0"
-platformdirs = ">=2"
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.10)"]
-jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
-uvloop = ["uvloop (>=0.15.2)"]
-
 [[package]]
 name = "blessed"
 version = "1.21.0"
@@ -1394,18 +1347,6 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
-[[package]]
-name = "pathspec"
-version = "0.12.1"
-description = "Utility library for gitignore style pattern matching of file paths."
-optional = false
-python-versions = ">=3.8"
-groups = ["main"]
-files = [
-    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
-    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
-]
-
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -2748,4 +2689,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9"
-content-hash = "1ba28119bcc2b572133da8f243eea42fc8f732b6255afac7c2c7e616e2c68677"
+content-hash = "1a73e9db33e3884cf1cc6e3371816aebd20831845ef9bf671be315e659480e86"
diff --git a/pyproject.toml b/pyproject.toml
index 6a5c4904a..c3e48f889 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,7 +93,6 @@ crosshair-tool = ">=0.0.78"
 coverage = ">=7.6.4"
 line_profiler=">=4.2.0" #this is the minimum version which supports python 3.13
 platformdirs = ">=4.3.7"
-black = "^25.1.0"
 [tool.poetry.group.dev]
 optional = true
 
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index baf5b8079..11790c951 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -263,6 +263,16 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
             helper_functions=[],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
+            # this is just for testing, but in practice, this would be an optimized function code and it will be well-formatted
+            optimized_function="""    def process(self):
+        data=self.load_data()
+        if not data:return{"success":False,"error":"No data loaded"}
+        
+        validated_data=self.validate_data(data)
+        processed_result=process_data(validated_data,
+                                    filter_func=lambda x:x.get('active',True),
+                                    transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},
+                                    sort_key=lambda x:x.get('name',''))""",
         )
         
         if should_content_change:

From caeda49a74864e0e475e7a277549188036f087ee Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:25:26 +0300
Subject: [PATCH 17/26] make sure format_code recieves file path as path type
 not as str

---
 codeflash/code_utils/formatter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 0a51c303c..8ad3e3f02 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -104,6 +104,9 @@ def format_code(formatter_cmds: list[str], path: Path, optimized_function: str =
     with tempfile.TemporaryDirectory() as test_dir_str:
         max_diff_lines = 100
 
+        if type(path) is str:
+            path = Path(path)
+
         original_code = path.read_text(encoding="utf8")
         # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided)
         original_code_without_opfunc = original_code.replace(optimized_function, "")

From 6967fcb22a9ce66f0668ea5bdb856db78899c5f0 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:29:30 +0300
Subject: [PATCH 18/26] formatting and linting

---
 codeflash/code_utils/formatter.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 8ad3e3f02..d7d4bd438 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -8,7 +8,7 @@
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Union
 
 import isort
 
@@ -100,11 +100,16 @@ def is_diff_line(line: str) -> bool:
     return len(diff_lines)
 
 
-def format_code(formatter_cmds: list[str], path: Path, optimized_function: str = "", print_status: bool = True) -> str:  # noqa
+def format_code(
+    formatter_cmds: list[str],
+    path: Union[str, Path],
+    optimized_function: str = "",
+    print_status: bool = True,  # noqa
+) -> str:
     with tempfile.TemporaryDirectory() as test_dir_str:
         max_diff_lines = 100
 
-        if type(path) is str:
+        if isinstance(path, str):
             path = Path(path)
 
         original_code = path.read_text(encoding="utf8")

From 8248c8e594d24c71749515630fc34704288ab4fe Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:48:20 +0300
Subject: [PATCH 19/26] typo

---
 codeflash/optimization/function_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 1e918b40f..419fa1e2e 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -605,7 +605,7 @@ def reformat_code_and_helpers(
             new_code = sort_imports(new_code)
 
         new_helper_code: dict[Path, str] = {}
-        helper_functions_paths = {hf.source_code for hf in helper_functions}
+        helper_functions_paths = {hf.file_path for hf in helper_functions}
         for module_abspath in helper_functions_paths:
             formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath)
             if should_sort_imports:

From 15aacdbffa98eb17fb27bb9aa2336adfbe6a8501 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:50:34 +0300
Subject: [PATCH 20/26] revert lock file changes

---
 poetry.lock | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 825a15b02..04cfeae09 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1025,11 +1025,8 @@ files = [
     {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"},
     {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"},
-    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"},
-    {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"},
     {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"},
-    {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"},
     {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"},
     {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"},
     {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"},

From c24fc9037f74f0c72dfe84a307523a4d84186249 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 02:52:17 +0300
Subject: [PATCH 21/26] remove comment

---
 codeflash/code_utils/formatter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index d7d4bd438..5d4540116 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -113,7 +113,7 @@ def format_code(
             path = Path(path)
 
         original_code = path.read_text(encoding="utf8")
-        # we dont' count the formatting diff for the optimized function as it should be well-formatted (if it's provided)
+        # we dont' count the formatting diff for the optimized function as it should be well-formatted
         original_code_without_opfunc = original_code.replace(optimized_function, "")
 
         original_temp = Path(test_dir_str) / "original_temp.py"

From b48e9e6e64e24f7e58b641cd1cd8a18111ac4c67 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Thu, 5 Jun 2025 03:12:48 +0300
Subject: [PATCH 22/26] pass helper functions source code to the formatter for
 diff checking

---
 codeflash/optimization/function_optimizer.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 419fa1e2e..7edba5e74 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -605,9 +605,12 @@ def reformat_code_and_helpers(
             new_code = sort_imports(new_code)
 
         new_helper_code: dict[Path, str] = {}
-        helper_functions_paths = {hf.file_path for hf in helper_functions}
-        for module_abspath in helper_functions_paths:
-            formatted_helper_code = format_code(self.args.formatter_cmds, module_abspath)
+        for hp in helper_functions:
+            module_abspath = hp.file_path
+            hp_source_code = hp.source_code
+            formatted_helper_code = format_code(
+                self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code
+            )
             if should_sort_imports:
                 formatted_helper_code = sort_imports(formatted_helper_code)
             new_helper_code[module_abspath] = formatted_helper_code

From 64f2dd99566eae09e0aa5035ed7bd861760f9653 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Sat, 7 Jun 2025 01:15:23 +0300
Subject: [PATCH 23/26] more unit tests

---
 code_to_optimize/few_formatting_errors.py  |  47 --
 code_to_optimize/many_formatting_errors.py | 147 ------
 code_to_optimize/no_formatting_errors.py   |  52 --
 tests/test_formatter.py                    | 553 ++++++++++++++++++++-
 4 files changed, 530 insertions(+), 269 deletions(-)
 delete mode 100644 code_to_optimize/few_formatting_errors.py
 delete mode 100644 code_to_optimize/many_formatting_errors.py
 delete mode 100644 code_to_optimize/no_formatting_errors.py

diff --git a/code_to_optimize/few_formatting_errors.py b/code_to_optimize/few_formatting_errors.py
deleted file mode 100644
index 27ed71b44..000000000
--- a/code_to_optimize/few_formatting_errors.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-
-class UnformattedExampleClass(object):
-    def __init__(
-        self,
-        name,
-        age=    None,
-        email=  None,
-        phone=None,
-        address=None,
-        city=None,
-        state=None,
-        zip_code=None,
-    ):
-        self.name = name
-        self.age = age
-        self.email = email
-        self.phone = phone
-        self.   address = address
-        self.city = city
-        self.state = state
-        self.zip_code = zip_code
-        self.data = {"name": name, "age": age, "email": email}
-
-    def get_info(self):
-        return f"Name: {self.name}, Age: {self.age}"
-
-    def update_data(self, **kwargs):
-        for key, value in kwargs.items():
-            if hasattr(self, key):
-                setattr(self, key, value)
-        self.data.update(kwargs)
-
-
-def process_data(
-    data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False
-):
-    if not data_list:
-        return []
-    if filter_func:
-        data_list = [   item for item in data_list if filter_func(item)]
-    if transform_func:
-        data_list = [transform_func(item) for item in data_list]
-    if sort_key:
-        data_list = sorted(data_list, key=sort_key, reverse=reverse)
-    return data_list
-
diff --git a/code_to_optimize/many_formatting_errors.py b/code_to_optimize/many_formatting_errors.py
deleted file mode 100644
index 79cfc825d..000000000
--- a/code_to_optimize/many_formatting_errors.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import os,sys,json,datetime,math,random;import requests;from collections import defaultdict,OrderedDict
-from typing import List,Dict,Optional,Union,Tuple,Any;import numpy as np;import pandas as pd
-
-# This is a poorly formatted Python file with many style violations
-
-class   UnformattedExampleClass( object ):
-    def __init__(self,name,age=None,email=None,phone=None,address=None,city=None,state=None,zip_code=None):
-        self.name=name;self.age=age;self.email=email;self.phone=phone
-        self.address=address;self.city=city;self.state=state;self.zip_code=zip_code
-        self.data={"name":name,"age":age,"email":email}
-
-    def   get_info(self   ):
-        return f"Name: {self.name}, Age: {self.age}"
-
-    def update_data(self,**kwargs):
-        for key,value in kwargs.items():
-            if hasattr(self,key):setattr(self,key,value)
-        self.data.update(kwargs)
-
-def process_data(data_list,filter_func=None,transform_func=None,sort_key=None,reverse=False):
-    if not data_list:return[]
-    if filter_func:data_list=[item for item in data_list if filter_func(item)]
-    if transform_func:data_list=[transform_func(item)for item in data_list]
-    if sort_key:data_list=sorted(data_list,key=sort_key,reverse=reverse)
-    return data_list
-
-def calculate_statistics(numbers):
-    if not numbers:return None
-    mean=sum(numbers)/len(numbers);         median=sorted(numbers)[len(numbers)//2]
-    variance=sum((x-mean)**2 for x in numbers)/len(numbers);std_dev=math.sqrt(variance)
-    return      {"mean":mean,"median":median,"variance":variance,"std_dev":std_dev,"min":min(numbers),"max":max(numbers)}
-
-def complex_nested_function(x,y,z):
-    def inner_function_1(a,b):
-        def deeply_nested(c,d):
-            return c*d+a*b
-        return deeply_nested(a+1,b-1)+deeply_nested(a-1,b+1)
-    def     inner_function_2    (a,b,c):
-        result=[]
-        for i in range(a):
-            for j in     range(b):
-                for k in range(c):
-                    if i*j*k>0:result.append(i*j*k)
-                    elif i+j+k==0:result.append(-1)
-                    else    :result.append(0)
-        return result
-    return inner_function_1(x,y)+sum(inner_function_2(x,y,z))
-
-# Long lines and poor dictionary formatting
-user_data={"users":[{"id":1,"name":"John Doe","email":"john@example.com","preferences":{"theme":"dark","notifications":True,"language":"en"},"metadata":{"created_at":"2023-01-01","last_login":"2024-01-01","login_count":150}},{"id":2,"name":"Jane Smith","email":"jane@example.com","preferences":{"theme":"light","notifications":False,"language":"es"},"metadata":{"created_at":"2023-02-15","last_login":"2024-01-15","login_count":89}}]}
-
-# Poor list formatting and string concatenation
-long_list_of_items=['item_1','item_2','item_3','item_4','item_5','item_6','item_7','item_8','item_9','item_10','item_11','item_12','item_13','item_14','item_15','item_16','item_17','item_18','item_19','item_20']
-
-def generate_report(data,include_stats=True,include_charts=False,format_type='json',output_file=None):
-    if not data:raise ValueError("Data cannot be empty")
-    report={'timestamp':datetime.datetime.now().isoformat(),'data_count':len(data),'summary':{}}
-    
-    # Bad formatting in loops and conditionals
-    for i,item in enumerate(data):
-        if isinstance(item,dict):
-            for key,value in item.items():
-                if key not in report['summary']:report['summary'][key]=[]
-                report['summary'][key].append(value)
-        elif isinstance(item,(int,float)):
-            if 'numbers' not in report['summary']:report['summary']['numbers']=[]
-            report['summary']['numbers'].append(item)
-        else:
-            if 'other' not in report['summary']:report['summary']['other']=[]
-            report['summary']['other'].append(str(item))
-    
-    if include_stats and 'numbers' in report['summary']:
-        numbers=report['summary']['numbers']
-        report['statistics']=calculate_statistics(numbers)
-    
-    # Long conditional chain with poor formatting
-    if format_type=='json':result=json.dumps(report,indent=None,separators=(',',':'))
-    elif format_type=='pretty_json':result=json.dumps(report,indent=2)
-    elif format_type=='string':result=str(report)
-    else:result=report
-    
-    if output_file:
-        with open(output_file,'w')as f:f.write(result if isinstance(result,str)else json.dumps(result))
-    
-    return result
-
-class   DataProcessor  (  UnformattedExampleClass  )  :
-    def __init__(self,data_source,config=None,debug=False):
-        super().__init__("DataProcessor")
-        self.data_source=data_source;self.config=config or{};self.debug=debug
-        self.processed_data=[];self.errors=[];self.warnings=[]
-
-    def   load_data  (  self  )  :
-        try:
-            if isinstance(self.data_source,str):
-                if self.data_source.endswith('.json'):
-                    with open(self.data_source,'r')as f:data=json.load(f)
-                elif self.data_source.endswith('.csv'):data=pd.read_csv(self.data_source).to_dict('records')
-                else:raise ValueError(f"Unsupported file type: {self.data_source}")
-            elif isinstance(self.data_source,list):data=self.data_source
-            else:data=[self.data_source]
-            return data
-        except Exception as e:
-            self.errors.append(str(e));return[]
-
-    def validate_data(self,data):
-        valid_items=[];invalid_items=[]
-        for item in data:
-            if isinstance(item,dict)and'id'in item and'name'in item:valid_items.append(item)
-            else:invalid_items.append(item)
-        if invalid_items:self.warnings.append(f"Found {len(invalid_items)} invalid items")
-        return valid_items
-
-    def process(self):
-        data=self.load_data()
-        if not data:return{"success":False,"error":"No data loaded"}
-        
-        validated_data=self.validate_data(data)
-        processed_result=process_data(validated_data,
-                                    filter_func=lambda x:x.get('active',True),
-                                    transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},
-                                    sort_key=lambda x:x.get('name',''))
-        
-        self.processed_data=processed_result
-        return{"success":True,"count":len(processed_result),"data":processed_result}
-if __name__=="__main__":
-    sample_data=[{"id":1,"name":"Alice","active":True},{"id":2,"name":"Bob","active":False},{"id":3,"name":"Charlie","active":True}]
-    
-    processor=DataProcessor(sample_data,config={"debug":True})
-    result=processor.process()
-    
-    if result["success"]:
-        print(f"Successfully processed {result['count']} items")
-        for item in result["data"][:3]:print(f"- {item['name']} (ID: {item['id']})")
-    else:print(f"Processing failed: {result.get('error','Unknown error')}")
-    
-    # Generate report with poor formatting
-    report=generate_report(sample_data,include_stats=True,format_type='pretty_json')
-    print("Generated report:",report[:100]+"..."if len(report)>100 else report)
-    
-    # Complex calculation with poor spacing
-    numbers=[random.randint(1,100)for _ in range(50)]
-    stats=calculate_statistics(numbers)
-    complex_result=complex_nested_function(5,3,2)
-    
-    print(f"Statistics: mean={stats['mean']:.2f}, std_dev={stats['std_dev']:.2f}")
-    print(f"Complex calculation result: {complex_result}")
diff --git a/code_to_optimize/no_formatting_errors.py b/code_to_optimize/no_formatting_errors.py
deleted file mode 100644
index c521ef420..000000000
--- a/code_to_optimize/no_formatting_errors.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import math
-
-
-class UnformattedExampleClass:
-    def __init__(self, name, age=None, email=None, phone=None, address=None, city=None, state=None, zip_code=None):
-        self.name = name
-        self.age = age
-        self.email = email
-        self.phone = phone
-        self.address = address
-        self.city = city
-        self.state = state
-        self.zip_code = zip_code
-        self.data = {"name": name, "age": age, "email": email}
-
-    def get_info(self):
-        return f"Name: {self.name}, Age: {self.age}"
-
-    def update_data(self, **kwargs):
-        for key, value in kwargs.items():
-            if hasattr(self, key):
-                setattr(self, key, value)
-        self.data.update(kwargs)
-
-
-def process_data(data_list, filter_func=None, transform_func=None, sort_key=None, reverse=False):
-    if not data_list:
-        return []
-    if filter_func:
-        data_list = [item for item in data_list if filter_func(item)]
-    if transform_func:
-        data_list = [transform_func(item) for item in data_list]
-    if sort_key:
-        data_list = sorted(data_list, key=sort_key, reverse=reverse)
-    return data_list
-
-
-def calculate_statistics(numbers):
-    if not numbers:
-        return None
-    mean = sum(numbers) / len(numbers)
-    median = sorted(numbers)[len(numbers) // 2]
-    variance = sum((x - mean) ** 2 for x in numbers) / len(numbers)
-    std_dev = math.sqrt(variance)
-    return {
-        "mean": mean,
-        "median": median,
-        "variance": variance,
-        "std_dev": std_dev,
-        "min": min(numbers),
-        "max": max(numbers),
-    }
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
index 11790c951..fbd7d0b9d 100644
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@@ -216,18 +216,18 @@ def foo():
             format_code(formatter_cmds=["exit 1"], path=Path(tmp_path))
 
 
-def _run_formatting_test(source_filename: str, should_content_change: bool):
+def _run_formatting_test(source_code: str, should_content_change: bool, expected = None, optimized_function: str = ""):
     try:
         import ruff  # type: ignore
     except ImportError:
         pytest.skip("ruff is not installed")
+
     with tempfile.TemporaryDirectory() as test_dir_str:
         test_dir = Path(test_dir_str)
-        this_file = Path(__file__).resolve()
-        repo_root_dir = this_file.parent.parent
-        source_file = repo_root_dir / "code_to_optimize" / source_filename
-
-        original = source_file.read_text()
+        source_file = test_dir / "source.py"
+        
+        source_file.write_text(source_code)
+        original = source_code
         target_path = test_dir / "target.py"
         
         shutil.copy2(source_file, target_path)
@@ -259,37 +259,544 @@ def _run_formatting_test(source_filename: str, should_content_change: bool):
             args=args,
         )
         
-        content, _ = optimizer.reformat_code_and_helpers(
+        optimizer.reformat_code_and_helpers(
             helper_functions=[],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
-            # this is just for testing, but in practice, this would be an optimized function code and it will be well-formatted
-            optimized_function="""    def process(self):
-        data=self.load_data()
-        if not data:return{"success":False,"error":"No data loaded"}
-        
-        validated_data=self.validate_data(data)
-        processed_result=process_data(validated_data,
-                                    filter_func=lambda x:x.get('active',True),
-                                    transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},
-                                    sort_key=lambda x:x.get('name',''))""",
+            optimized_function=optimized_function,
         )
-        
+
+        content = target_path.read_text(encoding="utf8")
+
+        if expected is not None:
+            assert content == expected, f"Expected content to be \n===========\n{expected}\n===========\nbut got\n===========\n{content}\n===========\n"
+
         if should_content_change:
-            assert content != original, f"Expected content to change for {source_filename}"
+            assert content != original, f"Expected content to change for source.py"
         else:
-            assert content == original, f"Expected content to remain unchanged for {source_filename}"
+            assert content == original, f"Expected content to remain unchanged for source.py"
+
 
 
 def test_formatting_file_with_many_diffs():
     """Test that files with many formatting errors are skipped (content unchanged)."""
-    _run_formatting_test("many_formatting_errors.py", should_content_change=False)
+    source_code = '''import os,sys,json,datetime,re
+from collections import defaultdict,OrderedDict
+import numpy as np,pandas as pd
+
+class DataProcessor:
+    def __init__(self,config_path,data_path,output_path):
+        self.config_path=config_path
+        self.data_path=data_path
+        self.output_path=output_path
+        self.config={}
+        self.data=[]
+        self.results={}
+    
+    def load_config(self):
+        with open(self.config_path,'r') as f:
+            self.config=json.load(f)
+        if 'required_fields' not in self.config:self.config['required_fields']=[]
+        if 'optional_fields' not in self.config:self.config['optional_fields']=[]
+        return self.config
+    
+    def validate_data(self,data):
+        errors=[]
+        for idx,record in enumerate(data):
+            if not isinstance(record,dict):
+                errors.append(f"Record {idx} is not a dictionary")
+                continue
+            for field in self.config.get('required_fields',[]):
+                if field not in record:
+                    errors.append(f"Record {idx} missing required field: {field}")
+                elif record[field] is None or record[field]=='':
+                    errors.append(f"Record {idx} has empty required field: {field}")
+        return errors
+    
+    def process_data(self,data,filter_func=None,transform_func=None,sort_key=None):
+        if filter_func:data=[item for item in data if filter_func(item)]
+        if transform_func:data=[transform_func(item) for item in data]
+        if sort_key:data=sorted(data,key=sort_key)
+        aggregated_data=defaultdict(list)
+        for item in data:
+            category=item.get('category','unknown')
+            aggregated_data[category].append(item)
+        final_results={}
+        for category,items in aggregated_data.items():
+            total_value=sum(item.get('value',0) for item in items)
+            avg_value=total_value/len(items) if items else 0
+            final_results[category]={'count':len(items),'total':total_value,'average':avg_value,'items':items}
+        return final_results
+    
+    def save_results(self,results):
+        with open(self.output_path,'w') as f:
+            json.dump(results,f,indent=2,default=str)
+        print(f"Results saved to {self.output_path}")
+    
+    def run_pipeline(self):
+        try:
+            config=self.load_config()
+            with open(self.data_path,'r') as f:
+                raw_data=json.load(f)
+            validation_errors=self.validate_data(raw_data)
+            if validation_errors:
+                print("Validation errors found:")
+                for error in validation_errors:print(f"  - {error}")
+                return False
+            processed_results=self.process_data(raw_data,filter_func=lambda x:x.get('active',True),transform_func=lambda x:{**x,'processed_at':datetime.datetime.now().isoformat()},sort_key=lambda x:x.get('name',''))
+            self.save_results(processed_results)
+            return True
+        except Exception as e:
+            print(f"Pipeline failed: {str(e)}")
+            return False
+
+def main():
+    processor=DataProcessor('/path/to/config.json','/path/to/data.json','/path/to/output.json')
+    success=processor.run_pipeline()
+    if success:print("Pipeline completed successfully")
+    else:print("Pipeline failed")
+
+if __name__=='__main__':main()
+'''
+    _run_formatting_test(source_code, False)
 
 
 def test_formatting_file_with_few_diffs():
     """Test that files with few formatting errors are formatted (content changed)."""
-    _run_formatting_test("few_formatting_errors.py", should_content_change=True)
+    source_code = '''import json
+from datetime import datetime
+
+def process_data(data, config=None):
+    """Process data with optional configuration."""
+    if not data:
+        return {"success": False, "error": "No data provided"}
+    
+    if config is None:
+        config = {"filter_active": True}
+    
+    # Minor formatting issues that should be fixed
+    result=[]
+    for item in data:
+        if config.get("filter_active") and not item.get("active",True):
+            continue
+        processed_item={
+            "id": item.get("id"),
+            "name": item.get("name",""),
+            "value": item.get("value",0),
+            "processed_at": datetime.now().isoformat()
+        }
+        result.append(processed_item)
+    
+    return {"success": True, "data": result, "count": len(result)}
+'''
+    _run_formatting_test(source_code, True)
+
 
 def test_formatting_file_with_no_diffs():
     """Test that files with no formatting errors are unchanged."""
-    _run_formatting_test("no_formatting_errors.py", should_content_change=False)
+    #  this test assumes you use ruff defaults for formatting
+    source_code = '''from datetime import datetime
+
+
+def process_data(data, config=None):
+    """Process data with optional configuration."""
+    if not data:
+        return {"success": False, "error": "No data provided"}
+
+    if config is None:
+        config = {"filter_active": True}
+
+    result = []
+    for item in data:
+        if config.get("filter_active") and not item.get("active", True):
+            continue
+
+        processed_item = {
+            "id": item.get("id"),
+            "name": item.get("name", ""),
+            "value": item.get("value", 0),
+            "processed_at": datetime.now().isoformat(),
+        }
+        result.append(processed_item)
+
+    return {"success": True, "data": result, "count": len(result)}
+'''
+    _run_formatting_test(source_code, False)
+
+
+def test_formatting_extremely_messy_file():
+    """Test that extremely messy files with 100+ potential changes are skipped."""
+    source_code = '''import os,sys,json,datetime,re,collections,itertools,functools,operator
+from pathlib import Path
+from typing import Dict,List,Optional,Union,Any,Tuple
+import numpy as np,pandas as pd,matplotlib.pyplot as plt
+from dataclasses import dataclass,field
+
+@dataclass
+class Config:
+    input_path:str
+    output_path:str
+    batch_size:int=100
+    max_retries:int=3
+    timeout:float=30.0
+    debug:bool=False
+    filters:List[str]=field(default_factory=list)
+    transformations:Dict[str,Any]=field(default_factory=dict)
+
+class DataProcessorAdvanced:
+    def __init__(self,config:Config):
+        self.config=config
+        self.data=[]
+        self.results={}
+        self.errors=[]
+        self.stats={'processed':0,'failed':0,'skipped':0}
+        
+    def load_data(self,file_path:str)->List[Dict]:
+        try:
+            with open(file_path,'r',encoding='utf-8') as f:
+                if file_path.endswith('.json'):data=json.load(f)
+                elif file_path.endswith('.csv'):
+                    import csv
+                    reader=csv.DictReader(f)
+                    data=[row for row in reader]
+                else:raise ValueError(f"Unsupported file format: {file_path}")
+            return data
+        except Exception as e:self.errors.append(f"Failed to load {file_path}: {str(e)}");return[]
+    
+    def validate_record(self,record:Dict,schema:Dict)->Tuple[bool,List[str]]:
+        errors=[]
+        for field,rules in schema.items():
+            if rules.get('required',False) and field not in record:
+                errors.append(f"Missing required field: {field}")
+            elif field in record:
+                value=record[field]
+                if 'type' in rules and not isinstance(value,rules['type']):
+                    errors.append(f"Field {field} has wrong type")
+                if 'min_length' in rules and isinstance(value,str) and len(value)<rules['min_length']:
+                    errors.append(f"Field {field} too short")
+                if 'max_length' in rules and isinstance(value,str) and len(value)>rules['max_length']:
+                    errors.append(f"Field {field} too long")
+                if 'min_value' in rules and isinstance(value,(int,float)) and value<rules['min_value']:
+                    errors.append(f"Field {field} below minimum")
+                if 'max_value' in rules and isinstance(value,(int,float)) and value>rules['max_value']:
+                    errors.append(f"Field {field} above maximum")
+        return len(errors)==0,errors
+    
+    def apply_filters(self,data:List[Dict])->List[Dict]:
+        filtered_data=data
+        for filter_name in self.config.filters:
+            if filter_name=='active_only':filtered_data=[r for r in filtered_data if r.get('active',True)]
+            elif filter_name=='has_value':filtered_data=[r for r in filtered_data if r.get('value') is not None]
+            elif filter_name=='recent_only':
+                cutoff=datetime.datetime.now()-datetime.timedelta(days=30)
+                filtered_data=[r for r in filtered_data if datetime.datetime.fromisoformat(r.get('created_at','1970-01-01'))>cutoff]
+        return filtered_data
+    
+    def apply_transformations(self,data:List[Dict])->List[Dict]:
+        for transform_name,params in self.config.transformations.items():
+            if transform_name=='add_timestamp':
+                for record in data:record['processed_at']=datetime.datetime.now().isoformat()
+            elif transform_name=='normalize_names':
+                for record in data:
+                    if 'name' in record:record['name']=record['name'].strip().title()
+            elif transform_name=='calculate_derived':
+                for record in data:
+                    if 'value' in record and 'multiplier' in params:
+                        record['derived_value']=record['value']*params['multiplier']
+        return data
+    
+    def process_batch(self,batch:List[Dict])->Dict[str,Any]:
+        try:
+            processed_batch=[]
+            for record in batch:
+                try:
+                    processed_record=dict(record)
+                    processed_record['batch_id']=len(self.results)
+                    processed_record['processed_at']=datetime.datetime.now().isoformat()
+                    processed_batch.append(processed_record)
+                    self.stats['processed']+=1
+                except Exception as e:
+                    self.errors.append(f"Failed to process record: {str(e)}")
+                    self.stats['failed']+=1
+            return {'success':True,'data':processed_batch,'count':len(processed_batch)}
+        except Exception as e:
+            self.errors.append(f"Batch processing failed: {str(e)}")
+            return {'success':False,'error':str(e)}
+    
+    def run_processing_pipeline(self)->bool:
+        try:
+            raw_data=self.load_data(self.config.input_path)
+            if not raw_data:return False
+            filtered_data=self.apply_filters(raw_data)
+            transformed_data=self.apply_transformations(filtered_data)
+            batches=[transformed_data[i:i+self.config.batch_size] for i in range(0,len(transformed_data),self.config.batch_size)]
+            all_results=[]
+            for i,batch in enumerate(batches):
+                if self.config.debug:print(f"Processing batch {i+1}/{len(batches)}")
+                result=self.process_batch(batch)
+                if result['success']:all_results.extend(result['data'])
+                else:self.stats['failed']+=len(batch)
+            with open(self.config.output_path,'w',encoding='utf-8') as f:
+                json.dump({'results':all_results,'stats':self.stats,'errors':self.errors},f,indent=2,default=str)
+            return True
+        except Exception as e:
+            self.errors.append(f"Pipeline failed: {str(e)}")
+            return False
+
+def create_sample_config()->Config:
+    return Config(input_path='input.json',output_path='output.json',batch_size=50,max_retries=3,timeout=60.0,debug=True,filters=['active_only','has_value'],transformations={'add_timestamp':{},'normalize_names':{},'calculate_derived':{'multiplier':1.5}})
+
+def main():
+    config=create_sample_config()
+    processor=DataProcessorAdvanced(config)
+    success=processor.run_processing_pipeline()
+    print(f"Processing {'completed' if success else 'failed'}")
+    print(f"Stats: {processor.stats}")
+    if processor.errors:
+        print("Errors encountered:")
+        for error in processor.errors:print(f"  - {error}")
+
+if __name__=='__main__':main()
+'''
+    _run_formatting_test(source_code, False)
+
+
+def test_formatting_edge_case_exactly_100_diffs():
+    """Test behavior when exactly at the threshold of 100 changes."""
+    # Create a file with exactly 100 minor formatting issues
+    source_code = '''import json\n''' + '''
+def func{}():
+    x=1;y=2;z=3
+    return x+y+z
+'''.replace('{}', '_{i}').format(i='{i}') * 33  # This creates exactly 100 potential formatting fixes
+
+    _run_formatting_test(source_code, False)
+
+
+def test_formatting_with_syntax_errors():
+    """Test that files with syntax errors are handled gracefully."""
+    source_code = '''import json
+
+def process_data(data):
+    if not data:
+        return {"error": "No data"
+    # Missing closing brace above
+    
+    result = []
+    for item in data
+        # Missing colon above
+        result.append(item)
+    
+    return result
+'''
+    _run_formatting_test(source_code, False)
+
+
+def test_formatting_mixed_quotes_and_spacing():
+    """Test files with mixed quote styles and inconsistent spacing."""
+    source_code = '''import json
+from datetime import datetime
+
+def process_mixed_style(data):
+    """Process data with mixed formatting styles."""
+    config={'default_value':0,'required_fields':["id","name"],'optional_fields':["description","tags"]}
+    
+    results=[]
+    for item in data:
+        if not isinstance(item,dict):continue
+        
+        # Mixed quote styles
+        item_id=item.get("id")
+        item_name=item.get('name')
+        item_desc=item.get("description",'')
+        
+        # Inconsistent spacing
+        processed={
+            'id':item_id,
+            "name": item_name,
+            'description':item_desc,
+            "processed_at":datetime.now().isoformat( ),
+            'status':'processed'
+        }
+        results.append(processed)
+    
+    return {'data':results,"count":len(results)}
+'''
+    _run_formatting_test(source_code, True)
+
+
+def test_formatting_long_lines_and_imports():
+    """Test files with long lines and import formatting issues."""
+    source_code = '''import os, sys, json, datetime, re, collections, itertools
+from pathlib import Path
+from typing import Dict, List, Optional
+
+def process_with_long_lines(data, filter_func=lambda x: x.get('active', True) and x.get('value', 0) > 0, transform_func=lambda x: {**x, 'processed_at': datetime.datetime.now().isoformat(), 'status': 'processed'}):
+    """Function with very long parameter line."""
+    return [transform_func(item) for item in data if filter_func(item) and isinstance(item, dict) and 'id' in item]
+
+def another_function_with_long_line():
+    very_long_dictionary = {'key1': 'value1', 'key2': 'value2', 'key3': 'value3', 'key4': 'value4', 'key5': 'value5'}
+    return very_long_dictionary
+'''
+    _run_formatting_test(source_code, True)
+
+
+def test_formatting_class_with_methods():
+    """Test formatting of classes with multiple methods and minor issues."""
+    source_code = '''class DataProcessor:
+    def __init__(self, config):
+        self.config=config
+        self.data=[]
+    
+    def load_data(self,file_path):
+        with open(file_path,'r') as f:
+            self.data=json.load(f)
+        return len(self.data)
+    
+    def process(self):
+        result=[]
+        for item in self.data:
+            if item.get('active',True):
+                result.append({
+                    'id':item['id'],
+                    'processed':True
+                })
+        return result
+'''
+    _run_formatting_test(source_code, True)
+
+
+def test_formatting_with_complex_comprehensions():
+    """Test files with complex list/dict comprehensions and formatting."""
+    source_code = '''def complex_comprehensions(data):
+    # Various comprehension styles with formatting issues
+    result1=[item['value'] for item in data if item.get('active',True) and 'value' in item]
+    
+    result2={item['id']:item['name'] for item in data if item.get('type')=='user'}
+    
+    result3=[[x,y] for x in range(10) for y in range(5) if x*y>10]
+    
+    # Nested comprehensions
+    nested=[[item for item in sublist if item%2==0] for sublist in data if isinstance(sublist,list)]
+    
+    return {
+        'simple':result1,
+        'mapping':result2,
+        'complex':result3,
+        'nested':nested
+    }
+'''
+    _run_formatting_test(source_code, True)
+
+
+def test_formatting_with_decorators_and_async():
+    """Test files with decorators and async functions."""
+    source_code = '''import asyncio
+from functools import wraps
+
+def timer_decorator(func):
+    @wraps(func)
+    def wrapper(*args,**kwargs):
+        start=time.time()
+        result=func(*args,**kwargs)
+        end=time.time()
+        print(f"{func.__name__} took {end-start:.2f} seconds")
+        return result
+    return wrapper
+
+@timer_decorator
+async def async_process_data(data):
+    result=[]
+    for item in data:
+        await asyncio.sleep(0.01)  # Simulate async work
+        processed_item={'id':item.get('id'),'processed':True}
+        result.append(processed_item)
+    return result
+
+class AsyncProcessor:
+    @staticmethod
+    async def process_batch(batch):
+        return [{'id':item['id'],'status':'done'} for item in batch if 'id' in item]
+'''
+    _run_formatting_test(source_code, True)
+
+
+def test_formatting_threshold_configuration():
+    """Test that the diff threshold can be configured (if supported)."""
+    # This test assumes the threshold might be configurable
+    source_code = '''import json,os,sys
+def func1():x=1;y=2;return x+y
+def func2():a=1;b=2;return a+b
+def func3():c=1;d=2;return c+d
+'''
+    # Test with a file that has moderate formatting issues
+    _run_formatting_test(source_code, True, optimized_function="def func2():a=1;b=2;return a+b")
+
+
+def test_formatting_empty_file():
+    """Test formatting of empty or minimal files."""
+    source_code = '''# Just a comment pass
+'''
+    _run_formatting_test(source_code, False)
+
+
+def test_formatting_with_docstrings():
+    """Test files with various docstring formats."""
+    source_code = """def function_with_docstring(    data):
+    '''
+    This is a function with a docstring.
+    
+    Args:
+        data: Input data to process
+        
+    Returns:
+        Processed data
+    '''
+    return  [item for item in data if item.get('active',True)]
+
+class ProcessorWithDocs:
+    '''A processor class with documentation.'''
+    
+    def __init__(self,config):
+        '''Initialize with configuration.'''
+        self.config=config
+    
+    def process(self,data):
+        '''Single quote docstring with formatting issues.'''
+        return{'result':[item for item in data if self._is_valid(item)]}
+    
+    def _is_valid(self,item):
+        return isinstance(item,dict) and 'id' in item"""
+    expected = '''def function_with_docstring(data):
+    """This is a function with a docstring.
+
+    Args:
+        data: Input data to process
+
+    Returns:
+        Processed data
+
+    """
+    return [item for item in data if item.get("active", True)]
+
+
+class ProcessorWithDocs:
+    """A processor class with documentation."""
+
+    def __init__(self, config):
+        """Initialize with configuration."""
+        self.config = config
+
+    def process(self, data):
+        """Single quote docstring with formatting issues."""
+        return {"result": [item for item in data if self._is_valid(item)]}
+
+    def _is_valid(self, item):
+        return isinstance(item, dict) and "id" in item
+'''
+
+    optimization_function = """    def process(self,data):
+        '''Single quote docstring with formatting issues.'''
+        return{'result':[item for item in data if self._is_valid(item)]}"""
+    _run_formatting_test(source_code, True, optimized_function=optimization_function, expected=expected)
\ No newline at end of file

From a1510a31da37b7e1d092af798eb65e1b0c9cd868 Mon Sep 17 00:00:00 2001
From: mohammed <mohammed18200118@gmail.com>
Date: Sat, 7 Jun 2025 02:09:15 +0300
Subject: [PATCH 24/26] enhancements

---
 codeflash/code_utils/formatter.py            | 45 +++++++++++---------
 codeflash/optimization/function_optimizer.py |  4 +-
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index 5d4540116..d93ef46f7 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -104,32 +104,39 @@ def format_code(
     formatter_cmds: list[str],
     path: Union[str, Path],
     optimized_function: str = "",
+    check_diff: bool = False,  # noqa
     print_status: bool = True,  # noqa
 ) -> str:
     with tempfile.TemporaryDirectory() as test_dir_str:
-        max_diff_lines = 100
-
         if isinstance(path, str):
             path = Path(path)
 
         original_code = path.read_text(encoding="utf8")
-        # we dont' count the formatting diff for the optimized function as it should be well-formatted
-        original_code_without_opfunc = original_code.replace(optimized_function, "")
-
-        original_temp = Path(test_dir_str) / "original_temp.py"
-        original_temp.write_text(original_code_without_opfunc, encoding="utf8")
-
-        formatted_temp, formatted_code = apply_formatter_cmds(
-            formatter_cmds, original_temp, test_dir_str, print_status=False
-        )
-
-        diff_output = generate_unified_diff(
-            original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp)
-        )
-        diff_lines_count = get_diff_lines_count(diff_output)
-        if diff_lines_count > max_diff_lines:
-            logger.debug(f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})")
-            return original_code
+        original_code_lines = len(original_code.split("\n"))
+
+        if check_diff and original_code_lines > 50:
+            # we dont' count the formatting diff for the optimized function as it should be well-formatted
+            original_code_without_opfunc = original_code.replace(optimized_function, "")
+
+            original_temp = Path(test_dir_str) / "original_temp.py"
+            original_temp.write_text(original_code_without_opfunc, encoding="utf8")
+
+            formatted_temp, formatted_code = apply_formatter_cmds(
+                formatter_cmds, original_temp, test_dir_str, print_status=False
+            )
+
+            diff_output = generate_unified_diff(
+                original_code_without_opfunc, formatted_code, from_file=str(original_temp), to_file=str(formatted_temp)
+            )
+            diff_lines_count = get_diff_lines_count(diff_output)
+
+            max_diff_lines = min(int(original_code_lines * 0.3), 50)
+
+            if diff_lines_count > max_diff_lines and max_diff_lines != -1:
+                logger.debug(
+                    f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})"
+                )
+                return original_code
 
         _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status)
         logger.debug(f"Formatted {path} with commands: {formatter_cmds}")
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index f5e5bced0..205865f06 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -612,7 +612,7 @@ def reformat_code_and_helpers(
         if should_sort_imports and isort.code(original_code) != original_code:
             should_sort_imports = False
 
-        new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function)
+        new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function, check_diff=True)
         if should_sort_imports:
             new_code = sort_imports(new_code)
 
@@ -621,7 +621,7 @@ def reformat_code_and_helpers(
             module_abspath = hp.file_path
             hp_source_code = hp.source_code
             formatted_helper_code = format_code(
-                self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code
+                self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code, check_diff=True
             )
             if should_sort_imports:
                 formatted_helper_code = sort_imports(formatted_helper_code)

From 6cb846906fd22d3c3e8085044e710d39c4243025 Mon Sep 17 00:00:00 2001
From: Sarthak Agarwal <sarthak.saga@gmail.com>
Date: Tue, 10 Jun 2025 15:40:11 +0530
Subject: [PATCH 25/26] Update formatter.py

add a todo comment
---
 codeflash/code_utils/formatter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index d93ef46f7..c4dd030b9 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -137,7 +137,7 @@ def format_code(
                     f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})"
                 )
                 return original_code
-
+        # ToDO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above.
         _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status)
         logger.debug(f"Formatted {path} with commands: {formatter_cmds}")
         return formatted_code

From 94e64d3218114391276f5b537dbcad161989e60c Mon Sep 17 00:00:00 2001
From: Sarthak Agarwal <sarthak.saga@gmail.com>
Date: Tue, 10 Jun 2025 15:42:27 +0530
Subject: [PATCH 26/26] Update formatter.py

Fix ruff lint
---
 codeflash/code_utils/formatter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
index c4dd030b9..b1cb58540 100644
--- a/codeflash/code_utils/formatter.py
+++ b/codeflash/code_utils/formatter.py
@@ -137,7 +137,7 @@ def format_code(
                     f"Skipping formatting {path}: {diff_lines_count} lines would change (max: {max_diff_lines})"
                 )
                 return original_code
-        # ToDO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above.
+        # TODO : We can avoid formatting the whole file again and only formatting the optimized code standalone and replace in formatted file above.
         _, formatted_code = apply_formatter_cmds(formatter_cmds, path, test_dir_str=None, print_status=print_status)
         logger.debug(f"Formatted {path} with commands: {formatter_cmds}")
         return formatted_code