@@ -119,24 +119,93 @@ def get_file_info(file_path: Path, prefix: str = "") -> str:
119119 return "\n " .join (filter (None , tree_string ))
120120
121121
122- def get_target_edit_files (target_dir : str , src_prefix : str ) -> list [str ]:
122+ def collect_test_files (directory : str ) -> list [str ]:
123+ """Collect all the test files in the directory."""
124+ test_files = []
125+ subdirs = []
126+
127+ # Walk through the directory
128+ for root , dirs , files in os .walk (directory ):
129+ if root .endswith ("/" ):
130+ root = root [:- 1 ]
131+ # Check if 'test' is part of the folder name
132+ if (
133+ "test" in os .path .basename (root ).lower ()
134+ or os .path .basename (root ) in subdirs
135+ ):
136+ for file in files :
137+ # Process only Python files
138+ if file .endswith (".py" ):
139+ file_path = os .path .join (root , file )
140+ test_files .append (file_path )
141+ for d in dirs :
142+ subdirs .append (d )
143+
144+ return test_files
145+
146+
147+ def collect_python_files (directory : str ) -> list [str ]:
148+ """List to store all the .py filenames"""
149+ python_files = []
150+
151+ # Walk through the directory recursively
152+ for root , _ , files in os .walk (directory ):
153+ for file in files :
154+ # Check if the file ends with '.py'
155+ if file .endswith (".py" ):
156+ file_path = os .path .join (root , file )
157+ python_files .append (file_path )
158+
159+ return python_files
160+
161+
162+ def _find_files_to_edit (base_dir : str , src_dir : str , test_dir : str ) -> list [str ]:
163+ """Identify files to remove content by heuristics.
164+ We assume source code is under [lib]/[lib] or [lib]/src.
165+ We exclude test code. This function would not work
166+ if test code doesn't have its own directory.
167+
168+ Args:
169+ ----
170+ base_dir (str): The path to local library.
171+ src_dir (str): The directory containing source code.
172+ test_dir (str): The directory containing test code.
173+
174+ Returns:
175+ -------
176+ list[str]: A list of files to be edited.
177+
178+ """
179+ files = collect_python_files (os .path .join (base_dir , src_dir ))
180+ test_files = collect_test_files (os .path .join (base_dir , test_dir ))
181+ files = list (set (files ) - set (test_files ))
182+
183+ # don't edit __init__ files
184+ files = [f for f in files if "__init__" not in f ]
185+ # don't edit __main__ files
186+ files = [f for f in files if "__main__" not in f ]
187+ # don't edit confest.py files
188+ files = [f for f in files if "conftest.py" not in f ]
189+ return files
190+
191+
192+ def get_target_edit_files (target_dir : str , src_dir : str , test_dir : str ) -> list [str ]:
123193 """Find the files with functions with the pass statement."""
124- files = []
125- for root , _ , filenames in os .walk (target_dir ):
126- for filename in filenames :
127- if filename .endswith (".py" ):
128- file_path = os .path .join (root , filename )
129- with open (file_path , "r" , encoding = "utf-8" , errors = "ignore" ) as file :
130- if " pass" in file .read ():
131- files .append (file_path )
194+ files = _find_files_to_edit (target_dir , src_dir , test_dir )
195+ filtered_files = []
196+ for file_path in files :
197+ with open (file_path , "r" , encoding = "utf-8" , errors = "ignore" ) as file :
198+ content = file .read ()
199+ if len (content .splitlines ()) < 1500 :
200+ filtered_files .append (file_path )
132201
133202 # Remove the base_dir prefix
134- files = [file .replace (target_dir , "" ).lstrip ("/" ) for file in files ]
135- files = [src_prefix + file for file in files ]
203+ filtered_files = [
204+ file .replace (target_dir , "" ).lstrip ("/" ) for file in filtered_files
205+ ]
136206 # Only keep python files
137- files = [file for file in files if file .endswith (".py" )]
138207
139- return files
208+ return filtered_files
140209
141210
142211def get_message (
0 commit comments