From 08884b1efc679b02cfc3ddf20c2027b718950ddc Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 10:31:06 -0800 Subject: [PATCH 01/13] Bump MINIFIED_FILE_THRESHOLD up --- src/codegen/sdk/core/file.py | 2 +- tests/unit/codegen/sdk/codebase/file/test_file.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index e5f0836f3..7015c72f7 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -45,7 +45,7 @@ logger = logging.getLogger(__name__) -MINIFIED_FILE_THRESHOLD = 500 +MINIFIED_FILE_THRESHOLD = 1000 @apidoc diff --git a/tests/unit/codegen/sdk/codebase/file/test_file.py b/tests/unit/codegen/sdk/codebase/file/test_file.py index fb4424d41..e76052b18 100644 --- a/tests/unit/codegen/sdk/codebase/file/test_file.py +++ b/tests/unit/codegen/sdk/codebase/file/test_file.py @@ -214,7 +214,7 @@ def test_files_in_subdirectories_case_sensitivity(tmpdir) -> None: def test_minified_file(tmpdir) -> None: - with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": f"console.log(1{'0' * 1000})"}) as codebase: + with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": f"console.log(1{'0' * 10000})"}) as codebase: # This should match the `*.min.js` pattern file1 = codebase.ctx.get_file("file1.min.js") assert file1 is None From 8aa21837ec770f0b04fd55b8b18d70964c3c6ead Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 12:24:47 -0800 Subject: [PATCH 02/13] Bump MINIFIED_FILE_THRESHOLD even more --- src/codegen/sdk/core/file.py | 2 +- .../statements/match_statement/test_try_catch_statement.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index 7015c72f7..e5fb316a1 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -45,7 +45,7 @@ logger = logging.getLogger(__name__) -MINIFIED_FILE_THRESHOLD = 1000 +MINIFIED_FILE_THRESHOLD = 5000 @apidoc diff --git a/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py b/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py index 0a1928551..635ca09e2 100644 --- a/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py +++ b/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py @@ -63,7 +63,7 @@ def test_try_except_statement_is_wrapped_in(tmpdir) -> None: def risky(): call() try: - call() + call()` if a: call() except NameError as e: From e0c11faeb041bc44d278b6dc042d6563e459c094 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 12:50:23 -0800 Subject: [PATCH 03/13] Change minified file detection strategy --- src/codegen/sdk/core/file.py | 7 +-- src/codegen/sdk/utils.py | 57 +++++++++++++++++++ .../codegen/sdk/codebase/file/example.min.js | 1 + .../codegen/sdk/codebase/file/test_file.py | 3 +- 4 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 tests/unit/codegen/sdk/codebase/file/example.min.js diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index e5fb316a1..d25ac1078 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -34,6 +34,7 @@ from codegen.sdk.topological_sort import pseudo_topological_sort from codegen.sdk.tree_sitter_parser import get_parser_by_filepath_or_extension, parse_file from codegen.sdk.typescript.function import TSFunction +from codegen.sdk.utils import is_minified_js from codegen.shared.decorators.docs import apidoc, noapidoc from codegen.visualizations.enums import VizNode @@ -45,8 +46,6 @@ logger = logging.getLogger(__name__) -MINIFIED_FILE_THRESHOLD = 5000 - @apidoc class File(Editable[None]): @@ -581,8 +580,8 @@ def from_content(cls, filepath: str | PathLike | Path, content: str, ctx: Codeba path = ctx.to_absolute(filepath) # Sanity check to ensure file is not a minified file - if any(len(line) >= MINIFIED_FILE_THRESHOLD for line in content.split("\n")): - logger.info(f"File {filepath} is a minified file (Line length < {MINIFIED_FILE_THRESHOLD}). Skipping...", extra={"filepath": filepath}) + if is_minified_js(content): + logger.info(f"File {filepath} is a minified file. Skipping...", extra={"filepath": filepath}) return None ts_node = parse_file(path, content) diff --git a/src/codegen/sdk/utils.py b/src/codegen/sdk/utils.py index 9b551d5d5..2e3bcf180 100644 --- a/src/codegen/sdk/utils.py +++ b/src/codegen/sdk/utils.py @@ -1,6 +1,7 @@ import os import re import shutil +import statistics from collections.abc import Iterable from contextlib import contextmanager from xml.dom.minidom import parseString @@ -245,3 +246,59 @@ def truncate_line(input: str, max_chars: int) -> str: if len(input) > max_chars: return input[:max_chars] + f"...(truncated from {len(input)} characters)." return input + + +def is_minified_js(content): + """Analyzes a string to determine if it contains minified JavaScript code. + + Args: + content: String containing JavaScript code to analyze + + Returns: + bool: True if the content appears to be minified JavaScript, False otherwise + """ + try: + # Skip empty content + if not content.strip(): + return False + + # Characteristics of minified JS files + lines = content.split('\n') + + # 1. Check for average line length (minified files have very long lines) + line_lengths = [len(line) for line in lines if line.strip()] + if not line_lengths: # Handle empty content case + return False + + avg_line_length = statistics.mean(line_lengths) + + # 2. Check for semicolon-to-newline ratio (minified often has ; instead of newlines) + semicolons = content.count(';') + newlines = len(lines) - 1 + semicolon_ratio = semicolons / max(newlines, 1) # Avoid division by zero + + # 3. Check whitespace ratio (minified has low whitespace) + whitespace_chars = len(re.findall(r'[\s]', content)) + total_chars = len(content) + whitespace_ratio = whitespace_chars / total_chars if total_chars else 0 + + # 4. Check for common minification patterns + has_common_patterns = bool(re.search(r'[\w\)]\{[\w:]+\}', content)) # Condensed object notation + + # 5. Check for short variable names (common in minified code) + variable_names = re.findall(r'var\s+(\w+)', content) + avg_var_length = statistics.mean([len(name) for name in variable_names]) if variable_names else 0 + + # Decision logic - tuned threshold values + is_minified = ( + (avg_line_length > 100) and # Long average line length + (semicolon_ratio > 0.5 or has_common_patterns) and # High semicolon ratio or minification patterns + (whitespace_ratio < 0.15) and # Low whitespace ratio + (not variable_names or avg_var_length < 3) # Short variable names or no vars + ) + + return is_minified + + except Exception as e: + print(f"Error analyzing content: {e}") + return False diff --git a/tests/unit/codegen/sdk/codebase/file/example.min.js b/tests/unit/codegen/sdk/codebase/file/example.min.js new file mode 100644 index 000000000..2b8ad69d6 --- /dev/null +++ b/tests/unit/codegen/sdk/codebase/file/example.min.js @@ -0,0 +1 @@ +!function(e){var t={};function n(r){if(t[r])return t[r].exports;var o=t[r]={i:r,l:!1,exports:{}};return e[r].call(o.exports,o,o.exports,n),o.l=!0,o.exports}n.m=e,n.c=t,n.d=function(e,t,r){n.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},n.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n.t=function(e,t){if(1&t&&(e=n(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(n.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)n.d(r,o,function(t){return e[t]}.bind(null,o));return r},n.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return n.d(t,"a",t),t},n.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},n.p="",n(n.s=0)}([function(e,t,n){"use strict";n.r(t);var r=function(){function e(e,t){this.x=e,this.y=t}return e.prototype.add=function(e){return new this.constructor(this.x+e.x,this.y+e.y)},e.prototype.sub=function(e){return new this.constructor(this.x-e.x,this.y-e.y)},e.prototype.mul=function(e){return new this.constructor(this.x*e,this.y*e)},e.prototype.div=function(e){return new this.constructor(this.x/e,this.y/e)},e.prototype.mag=function(){return Math.sqrt(this.x*this.x+this.y*this.y)},e.prototype.normalize=function(){var e=this.mag();return e>0?this.div(e):new this.constructor(0,0)},e.prototype.dot=function(e){return this.x*e.x+this.y*e.y},e}();function o(e){return new r(e.x,e.y)}var i=function(){function e(){this.p=new Map}return e.prototype.getData=function(e){return this.p.get(e)},e.prototype.setData=function(e,t){this.p.set(e,t)},e.prototype.clear=function(){this.p.clear()},e}();window.onload=function(){var e=new i;e.setData("a",1),e.setData("b",2),e.setData("c",3);var t=new r(5,7),n=new r(3,2),u=t.add(n);console.log("Vec sum:",u)}}]); diff --git a/tests/unit/codegen/sdk/codebase/file/test_file.py b/tests/unit/codegen/sdk/codebase/file/test_file.py index e76052b18..580fa93b1 100644 --- a/tests/unit/codegen/sdk/codebase/file/test_file.py +++ b/tests/unit/codegen/sdk/codebase/file/test_file.py @@ -1,3 +1,4 @@ +import os import sys import pytest @@ -214,7 +215,7 @@ def test_files_in_subdirectories_case_sensitivity(tmpdir) -> None: def test_minified_file(tmpdir) -> None: - with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": f"console.log(1{'0' * 10000})"}) as codebase: + with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": open(f"{os.path.dirname(__file__)}/example.min.js").read()}) as codebase: # This should match the `*.min.js` pattern file1 = codebase.ctx.get_file("file1.min.js") assert file1 is None From c990946a40354675eea8e6f8ccf2a103054ed997 Mon Sep 17 00:00:00 2001 From: EdwardJXLi <20020059+EdwardJXLi@users.noreply.github.com> Date: Mon, 24 Feb 2025 20:51:25 +0000 Subject: [PATCH 04/13] Automated pre-commit update --- src/codegen/sdk/utils.py | 18 ++-- .../codegen/sdk/codebase/file/example.min.js | 101 +++++++++++++++++- 2 files changed, 109 insertions(+), 10 deletions(-) diff --git a/src/codegen/sdk/utils.py b/src/codegen/sdk/utils.py index 2e3bcf180..0b54137b1 100644 --- a/src/codegen/sdk/utils.py +++ b/src/codegen/sdk/utils.py @@ -263,7 +263,7 @@ def is_minified_js(content): return False # Characteristics of minified JS files - lines = content.split('\n') + lines = content.split("\n") # 1. Check for average line length (minified files have very long lines) line_lengths = [len(line) for line in lines if line.strip()] @@ -273,28 +273,28 @@ def is_minified_js(content): avg_line_length = statistics.mean(line_lengths) # 2. Check for semicolon-to-newline ratio (minified often has ; instead of newlines) - semicolons = content.count(';') + semicolons = content.count(";") newlines = len(lines) - 1 semicolon_ratio = semicolons / max(newlines, 1) # Avoid division by zero # 3. Check whitespace ratio (minified has low whitespace) - whitespace_chars = len(re.findall(r'[\s]', content)) + whitespace_chars = len(re.findall(r"[\s]", content)) total_chars = len(content) whitespace_ratio = whitespace_chars / total_chars if total_chars else 0 # 4. Check for common minification patterns - has_common_patterns = bool(re.search(r'[\w\)]\{[\w:]+\}', content)) # Condensed object notation + has_common_patterns = bool(re.search(r"[\w\)]\{[\w:]+\}", content)) # Condensed object notation # 5. Check for short variable names (common in minified code) - variable_names = re.findall(r'var\s+(\w+)', content) + variable_names = re.findall(r"var\s+(\w+)", content) avg_var_length = statistics.mean([len(name) for name in variable_names]) if variable_names else 0 # Decision logic - tuned threshold values is_minified = ( - (avg_line_length > 100) and # Long average line length - (semicolon_ratio > 0.5 or has_common_patterns) and # High semicolon ratio or minification patterns - (whitespace_ratio < 0.15) and # Low whitespace ratio - (not variable_names or avg_var_length < 3) # Short variable names or no vars + (avg_line_length > 100) # Long average line length + and (semicolon_ratio > 0.5 or has_common_patterns) # High semicolon ratio or minification patterns + and (whitespace_ratio < 0.15) # Low whitespace ratio + and (not variable_names or avg_var_length < 3) # Short variable names or no vars ) return is_minified diff --git a/tests/unit/codegen/sdk/codebase/file/example.min.js b/tests/unit/codegen/sdk/codebase/file/example.min.js index 2b8ad69d6..11c222458 100644 --- a/tests/unit/codegen/sdk/codebase/file/example.min.js +++ b/tests/unit/codegen/sdk/codebase/file/example.min.js @@ -1 +1,100 @@ -!function(e){var t={};function n(r){if(t[r])return t[r].exports;var o=t[r]={i:r,l:!1,exports:{}};return e[r].call(o.exports,o,o.exports,n),o.l=!0,o.exports}n.m=e,n.c=t,n.d=function(e,t,r){n.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},n.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n.t=function(e,t){if(1&t&&(e=n(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(n.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)n.d(r,o,function(t){return e[t]}.bind(null,o));return r},n.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return n.d(t,"a",t),t},n.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},n.p="",n(n.s=0)}([function(e,t,n){"use strict";n.r(t);var r=function(){function e(e,t){this.x=e,this.y=t}return e.prototype.add=function(e){return new this.constructor(this.x+e.x,this.y+e.y)},e.prototype.sub=function(e){return new this.constructor(this.x-e.x,this.y-e.y)},e.prototype.mul=function(e){return new this.constructor(this.x*e,this.y*e)},e.prototype.div=function(e){return new this.constructor(this.x/e,this.y/e)},e.prototype.mag=function(){return Math.sqrt(this.x*this.x+this.y*this.y)},e.prototype.normalize=function(){var e=this.mag();return e>0?this.div(e):new this.constructor(0,0)},e.prototype.dot=function(e){return this.x*e.x+this.y*e.y},e}();function o(e){return new r(e.x,e.y)}var i=function(){function e(){this.p=new Map}return e.prototype.getData=function(e){return this.p.get(e)},e.prototype.setData=function(e,t){this.p.set(e,t)},e.prototype.clear=function(){this.p.clear()},e}();window.onload=function(){var e=new i;e.setData("a",1),e.setData("b",2),e.setData("c",3);var t=new r(5,7),n=new r(3,2),u=t.add(n);console.log("Vec sum:",u)}}]); +!((e) => { + var t = {}; + function n(r) { + if (t[r]) return t[r].exports; + var o = (t[r] = { i: r, l: !1, exports: {} }); + return e[r].call(o.exports, o, o.exports, n), (o.l = !0), o.exports; + } + (n.m = e), + (n.c = t), + (n.d = (e, t, r) => { + n.o(e, t) || Object.defineProperty(e, t, { enumerable: !0, get: r }); + }), + (n.r = (e) => { + "undefined" != typeof Symbol && + Symbol.toStringTag && + Object.defineProperty(e, Symbol.toStringTag, { value: "Module" }), + Object.defineProperty(e, "__esModule", { value: !0 }); + }), + (n.t = (e, t) => { + if ((1 & t && (e = n(e)), 8 & t)) return e; + if (4 & t && "object" == typeof e && e && e.__esModule) return e; + var r = Object.create(null); + if ( + (n.r(r), + Object.defineProperty(r, "default", { enumerable: !0, value: e }), + 2 & t && "string" != typeof e) + ) + for (var o in e) n.d(r, o, ((t) => e[t]).bind(null, o)); + return r; + }), + (n.n = (e) => { + var t = e && e.__esModule ? () => e.default : () => e; + return n.d(t, "a", t), t; + }), + (n.o = (e, t) => Object.prototype.hasOwnProperty.call(e, t)), + (n.p = ""), + n((n.s = 0)); +})([ + (e, t, n) => { + n.r(t); + var r = (() => { + function e(e, t) { + (this.x = e), (this.y = t); + } + return ( + (e.prototype.add = function (e) { + return new this.constructor(this.x + e.x, this.y + e.y); + }), + (e.prototype.sub = function (e) { + return new this.constructor(this.x - e.x, this.y - e.y); + }), + (e.prototype.mul = function (e) { + return new this.constructor(this.x * e, this.y * e); + }), + (e.prototype.div = function (e) { + return new this.constructor(this.x / e, this.y / e); + }), + (e.prototype.mag = function () { + return Math.sqrt(this.x * this.x + this.y * this.y); + }), + (e.prototype.normalize = function () { + var e = this.mag(); + return e > 0 ? this.div(e) : new this.constructor(0, 0); + }), + (e.prototype.dot = function (e) { + return this.x * e.x + this.y * e.y; + }), + e + ); + })(); + function o(e) { + return new r(e.x, e.y); + } + var i = (() => { + function e() { + this.p = new Map(); + } + return ( + (e.prototype.getData = function (e) { + return this.p.get(e); + }), + (e.prototype.setData = function (e, t) { + this.p.set(e, t); + }), + (e.prototype.clear = function () { + this.p.clear(); + }), + e + ); + })(); + window.onload = () => { + var e = new i(); + e.setData("a", 1), e.setData("b", 2), e.setData("c", 3); + var t = new r(5, 7), + n = new r(3, 2), + u = t.add(n); + console.log("Vec sum:", u); + }; + }, +]); From 3283cf883d98fe9e54f8dc59a8246c3f98fa809c Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:25:54 -0800 Subject: [PATCH 05/13] Fix extra backtick bug --- .../statements/match_statement/test_try_catch_statement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py b/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py index 635ca09e2..0a1928551 100644 --- a/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py +++ b/tests/unit/codegen/sdk/python/statements/match_statement/test_try_catch_statement.py @@ -63,7 +63,7 @@ def test_try_except_statement_is_wrapped_in(tmpdir) -> None: def risky(): call() try: - call()` + call() if a: call() except NameError as e: From 38b2436a67ceb0106a3b9044d28b90a3b1f19803 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:38:33 -0800 Subject: [PATCH 06/13] Tweak minified file detection --- src/codegen/sdk/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/codegen/sdk/utils.py b/src/codegen/sdk/utils.py index 2e3bcf180..fe8892cf4 100644 --- a/src/codegen/sdk/utils.py +++ b/src/codegen/sdk/utils.py @@ -291,12 +291,17 @@ def is_minified_js(content): # Decision logic - tuned threshold values is_minified = ( - (avg_line_length > 100) and # Long average line length - (semicolon_ratio > 0.5 or has_common_patterns) and # High semicolon ratio or minification patterns - (whitespace_ratio < 0.15) and # Low whitespace ratio - (not variable_names or avg_var_length < 3) # Short variable names or no vars + (avg_line_length > 250) and # Very long average line length + (semicolon_ratio > 0.8 or has_common_patterns) and # High semicolon ratio or minification patterns + (whitespace_ratio < 0.08) and # Very low whitespace ratio + (avg_var_length < 3 or not variable_names) # Extremely short variable names or no vars ) + # (avg_line_length > 100) and # Long average line length + # (semicolon_ratio > 0.5 or has_common_patterns) and # High semicolon ratio or minification patterns + # (whitespace_ratio < 0.15) and # Low whitespace ratio + # (not variable_names or avg_var_length < 3) # Short variable names or no vars + return is_minified except Exception as e: From 3eff31c5a917cdc8f1ae38a5062528bb841b7bd5 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:39:38 -0800 Subject: [PATCH 07/13] Add more files to GLOBAL_FILE_IGNORE_LIST --- src/codegen/sdk/codebase/codebase_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index f6c333e02..9af5818ce 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -68,6 +68,7 @@ "src/vs/platform/contextview/browser/contextMenuService.ts", "*/compiled/*", "*.min.js", + "*@*.js", ] From c7290a141695350d6f1da202b09dfd7b87f3ebb1 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:40:12 -0800 Subject: [PATCH 08/13] Delete test code --- src/codegen/sdk/utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/codegen/sdk/utils.py b/src/codegen/sdk/utils.py index 7fbe6e1ee..71abb4fea 100644 --- a/src/codegen/sdk/utils.py +++ b/src/codegen/sdk/utils.py @@ -297,11 +297,6 @@ def is_minified_js(content): (avg_var_length < 3 or not variable_names) # Extremely short variable names or no vars ) - # (avg_line_length > 100) and # Long average line length - # (semicolon_ratio > 0.5 or has_common_patterns) and # High semicolon ratio or minification patterns - # (whitespace_ratio < 0.15) and # Low whitespace ratio - # (not variable_names or avg_var_length < 3) # Short variable names or no vars - return is_minified except Exception as e: From 22fa3e05a87df273212760bffbc769a16c387147 Mon Sep 17 00:00:00 2001 From: EdwardJXLi <20020059+EdwardJXLi@users.noreply.github.com> Date: Mon, 24 Feb 2025 21:41:11 +0000 Subject: [PATCH 09/13] Automated pre-commit update --- src/codegen/sdk/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/codegen/sdk/utils.py b/src/codegen/sdk/utils.py index 71abb4fea..4049ae118 100644 --- a/src/codegen/sdk/utils.py +++ b/src/codegen/sdk/utils.py @@ -291,10 +291,10 @@ def is_minified_js(content): # Decision logic - tuned threshold values is_minified = ( - (avg_line_length > 250) and # Very long average line length - (semicolon_ratio > 0.8 or has_common_patterns) and # High semicolon ratio or minification patterns - (whitespace_ratio < 0.08) and # Very low whitespace ratio - (avg_var_length < 3 or not variable_names) # Extremely short variable names or no vars + (avg_line_length > 250) # Very long average line length + and (semicolon_ratio > 0.8 or has_common_patterns) # High semicolon ratio or minification patterns + and (whitespace_ratio < 0.08) # Very low whitespace ratio + and (avg_var_length < 3 or not variable_names) # Extremely short variable names or no vars ) return is_minified From 58e9da506fb2d25110b25e5eecee2bacbfd43c90 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:55:28 -0800 Subject: [PATCH 10/13] Fix test --- tests/unit/codegen/sdk/codebase/file/test_file.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unit/codegen/sdk/codebase/file/test_file.py b/tests/unit/codegen/sdk/codebase/file/test_file.py index 580fa93b1..0409e51fa 100644 --- a/tests/unit/codegen/sdk/codebase/file/test_file.py +++ b/tests/unit/codegen/sdk/codebase/file/test_file.py @@ -215,7 +215,14 @@ def test_files_in_subdirectories_case_sensitivity(tmpdir) -> None: def test_minified_file(tmpdir) -> None: - with get_codebase_session(tmpdir=tmpdir, files={"file1.min.js": "console.log(123)", "file2.js": open(f"{os.path.dirname(__file__)}/example.min.js").read()}) as codebase: + with get_codebase_session( + tmpdir=tmpdir, + files={ + "file1.min.js": "console.log(123)", + "file2.js": open(f"{os.path.dirname(__file__)}/example.min.js").read(), + }, + programming_language=ProgrammingLanguage.TYPESCRIPT, + ) as codebase: # This should match the `*.min.js` pattern file1 = codebase.ctx.get_file("file1.min.js") assert file1 is None From 12ed4874584d657be8453e82e8db2d9e7dd7e58f Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:55:38 -0800 Subject: [PATCH 11/13] Tell biome to ignore test file --- .../codegen/sdk/codebase/file/example.min.js | 102 +----------------- 1 file changed, 2 insertions(+), 100 deletions(-) diff --git a/tests/unit/codegen/sdk/codebase/file/example.min.js b/tests/unit/codegen/sdk/codebase/file/example.min.js index 11c222458..1dc9e88bb 100644 --- a/tests/unit/codegen/sdk/codebase/file/example.min.js +++ b/tests/unit/codegen/sdk/codebase/file/example.min.js @@ -1,100 +1,2 @@ -!((e) => { - var t = {}; - function n(r) { - if (t[r]) return t[r].exports; - var o = (t[r] = { i: r, l: !1, exports: {} }); - return e[r].call(o.exports, o, o.exports, n), (o.l = !0), o.exports; - } - (n.m = e), - (n.c = t), - (n.d = (e, t, r) => { - n.o(e, t) || Object.defineProperty(e, t, { enumerable: !0, get: r }); - }), - (n.r = (e) => { - "undefined" != typeof Symbol && - Symbol.toStringTag && - Object.defineProperty(e, Symbol.toStringTag, { value: "Module" }), - Object.defineProperty(e, "__esModule", { value: !0 }); - }), - (n.t = (e, t) => { - if ((1 & t && (e = n(e)), 8 & t)) return e; - if (4 & t && "object" == typeof e && e && e.__esModule) return e; - var r = Object.create(null); - if ( - (n.r(r), - Object.defineProperty(r, "default", { enumerable: !0, value: e }), - 2 & t && "string" != typeof e) - ) - for (var o in e) n.d(r, o, ((t) => e[t]).bind(null, o)); - return r; - }), - (n.n = (e) => { - var t = e && e.__esModule ? () => e.default : () => e; - return n.d(t, "a", t), t; - }), - (n.o = (e, t) => Object.prototype.hasOwnProperty.call(e, t)), - (n.p = ""), - n((n.s = 0)); -})([ - (e, t, n) => { - n.r(t); - var r = (() => { - function e(e, t) { - (this.x = e), (this.y = t); - } - return ( - (e.prototype.add = function (e) { - return new this.constructor(this.x + e.x, this.y + e.y); - }), - (e.prototype.sub = function (e) { - return new this.constructor(this.x - e.x, this.y - e.y); - }), - (e.prototype.mul = function (e) { - return new this.constructor(this.x * e, this.y * e); - }), - (e.prototype.div = function (e) { - return new this.constructor(this.x / e, this.y / e); - }), - (e.prototype.mag = function () { - return Math.sqrt(this.x * this.x + this.y * this.y); - }), - (e.prototype.normalize = function () { - var e = this.mag(); - return e > 0 ? this.div(e) : new this.constructor(0, 0); - }), - (e.prototype.dot = function (e) { - return this.x * e.x + this.y * e.y; - }), - e - ); - })(); - function o(e) { - return new r(e.x, e.y); - } - var i = (() => { - function e() { - this.p = new Map(); - } - return ( - (e.prototype.getData = function (e) { - return this.p.get(e); - }), - (e.prototype.setData = function (e, t) { - this.p.set(e, t); - }), - (e.prototype.clear = function () { - this.p.clear(); - }), - e - ); - })(); - window.onload = () => { - var e = new i(); - e.setData("a", 1), e.setData("b", 2), e.setData("c", 3); - var t = new r(5, 7), - n = new r(3, 2), - u = t.add(n); - console.log("Vec sum:", u); - }; - }, -]); +// biome-ignore lint format: test file +!function(e){var t={};function n(r){if(t[r])return t[r].exports;var o=t[r]={i:r,l:!1,exports:{}};return e[r].call(o.exports,o,o.exports,n),o.l=!0,o.exports}n.m=e,n.c=t,n.d=function(e,t,r){n.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},n.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n.t=function(e,t){if(1&t&&(e=n(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(n.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)n.d(r,o,function(t){return e[t]}.bind(null,o));return r},n.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return n.d(t,"a",t),t},n.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},n.p="",n(n.s=0)}([function(e,t,n){"use strict";n.r(t);var r=function(){function e(e,t){this.x=e,this.y=t}return e.prototype.add=function(e){return new this.constructor(this.x+e.x,this.y+e.y)},e.prototype.sub=function(e){return new this.constructor(this.x-e.x,this.y-e.y)},e.prototype.mul=function(e){return new this.constructor(this.x*e,this.y*e)},e.prototype.div=function(e){return new this.constructor(this.x/e,this.y/e)},e.prototype.mag=function(){return Math.sqrt(this.x*this.x+this.y*this.y)},e.prototype.normalize=function(){var e=this.mag();return e>0?this.div(e):new this.constructor(0,0)},e.prototype.dot=function(e){return this.x*e.x+this.y*e.y},e}();function o(e){return new r(e.x,e.y)}var i=function(){function e(){this.p=new Map}return e.prototype.getData=function(e){return this.p.get(e)},e.prototype.setData=function(e,t){this.p.set(e,t)},e.prototype.clear=function(){this.p.clear()},e}();window.onload=function(){var e=new i;e.setData("a",1),e.setData("b",2),e.setData("c",3);var t=new r(5,7),n=new r(3,2),u=t.add(n);console.log("Vec sum:",u)}}]); From ba09c2017c92a0137d68d8d1171b22f5a3794070 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 13:57:04 -0800 Subject: [PATCH 12/13] Ignore semver file --- src/codegen/sdk/codebase/codebase_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 9af5818ce..3fe9b0d80 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -66,6 +66,7 @@ ".*/tests/static/chunk-.*.js", ".*/ace/.*.js", "src/vs/platform/contextview/browser/contextMenuService.ts", + "semver.js", "*/compiled/*", "*.min.js", "*@*.js", From fcfa25d76072c4e8448f7bf5abcd52a45b5e47a9 Mon Sep 17 00:00:00 2001 From: Edward Li Date: Mon, 24 Feb 2025 14:12:19 -0800 Subject: [PATCH 13/13] Fix semver match --- src/codegen/sdk/codebase/codebase_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/sdk/codebase/codebase_context.py b/src/codegen/sdk/codebase/codebase_context.py index 3fe9b0d80..00567416d 100644 --- a/src/codegen/sdk/codebase/codebase_context.py +++ b/src/codegen/sdk/codebase/codebase_context.py @@ -66,7 +66,7 @@ ".*/tests/static/chunk-.*.js", ".*/ace/.*.js", "src/vs/platform/contextview/browser/contextMenuService.ts", - "semver.js", + "*/semver.js", "*/compiled/*", "*.min.js", "*@*.js",