diff --git a/src/codegen/git/repo_operator/repo_operator.py b/src/codegen/git/repo_operator/repo_operator.py index dd00d7e42..6f13bec43 100644 --- a/src/codegen/git/repo_operator/repo_operator.py +++ b/src/codegen/git/repo_operator/repo_operator.py @@ -634,7 +634,7 @@ def iter_files( content = self.get_file(filepath) yield rel_filepath, content except Exception as e: - print(f"Error reading file {filepath}: {e}") + logger.warning(f"Error reading file {filepath}: {e}") def list_files(self, subdirs: list[str] | None = None, extensions: list[str] | None = None) -> list[str]: """List files matching subdirs + extensions in a repo. diff --git a/src/codegen/git/utils/language.py b/src/codegen/git/utils/language.py index 6868b02dc..cc2742893 100644 --- a/src/codegen/git/utils/language.py +++ b/src/codegen/git/utils/language.py @@ -109,6 +109,7 @@ def _determine_language_by_git_file_count(folder_path: str) -> ProgrammingLangua """ from codegen.git.repo_operator.repo_operator import RepoOperator from codegen.git.schemas.repo_config import RepoConfig + from codegen.sdk.codebase.codebase_context import GLOBAL_FILE_IGNORE_LIST from codegen.sdk.python import PyFile from codegen.sdk.typescript.file import TSFile @@ -132,7 +133,7 @@ def _determine_language_by_git_file_count(folder_path: str) -> ProgrammingLangua repo_operator = RepoOperator(repo_config=repo_config) # Walk through the directory - for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None): + for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST): # Convert to Path object file_path = Path(git_root) / Path(rel_path) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index c4337683d..16118bc96 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -55,7 +55,16 @@ # src/vs/platform/contextview/browser/contextMenuService.ts is ignored as there is a parsing error with tree-sitter -GLOBAL_FILE_IGNORE_LIST = [".git/*", ".yarn/releases/*", ".*/tests/static/chunk-.*.js", ".*/ace/.*.js", "src/vs/platform/contextview/browser/contextMenuService.ts"] +GLOBAL_FILE_IGNORE_LIST = [ + ".git/*", + "*/.git/*", + "node_modules/*", + "*/node_modules/*", + ".yarn/releases/*", + ".*/tests/static/chunk-.*.js", + ".*/ace/.*.js", + "src/vs/platform/contextview/browser/contextMenuService.ts", +] @unique