diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 62a24eb16..f6c333e02 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -67,7 +67,7 @@ ".*/ace/.*.js", "src/vs/platform/contextview/browser/contextMenuService.ts", "*/compiled/*", - "*/*.min.js", + "*.min.js", ] diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index 930c9394c..e5f0836f3 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -45,6 +45,8 @@ logger = logging.getLogger(__name__) +MINIFIED_FILE_THRESHOLD = 500 + @apidoc class File(Editable[None]): @@ -577,6 +579,12 @@ def invalidate(self): def from_content(cls, filepath: str | PathLike | Path, content: str, ctx: CodebaseContext, sync: bool = True, verify_syntax: bool = True) -> Self | None: """Creates a new file from content and adds it to the graph.""" path = ctx.to_absolute(filepath) + + # Sanity check to ensure file is not a minified file + if any(len(line) >= MINIFIED_FILE_THRESHOLD for line in content.split("\n")): + logger.info(f"File {filepath} is a minified file (Line length < {MINIFIED_FILE_THRESHOLD}). Skipping...", extra={"filepath": filepath}) + return None + ts_node = parse_file(path, content) if ts_node.has_error and verify_syntax: logger.info("Failed to parse file %s", filepath) diff --git a/tests/unit/codegen/sdk/codebase/file/test_file.py b/tests/unit/codegen/sdk/codebase/file/test_file.py index 2c6b4dfa0..fb4424d41 100644 --- a/tests/unit/codegen/sdk/codebase/file/test_file.py +++ b/tests/unit/codegen/sdk/codebase/file/test_file.py @@ -211,3 +211,14 @@ def test_files_in_subdirectories_case_sensitivity(tmpdir) -> None: assert codebase.has_file("SubDir3/File3.py", ignore_case=False) assert not codebase.has_file("SUBDIR3/FILE3.py", ignore_case=False) assert not codebase.has_file("subdir3/file3.py", ignore_case=False) + + +def test_minified_file(tmpdir) -> None: + with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": f"console.log(1{'0' * 1000})"}) as codebase: + # This should match the `*.min.js` pattern + file1 = codebase.ctx.get_file("file1.min.js") + assert file1 is None + + # This should match the maximum line length threshold + file2 = codebase.ctx.get_file("file2.js") + assert file2 is None