Chore: make fuzzer produce minimal reproducible examples of bugs (#11700

) When the fuzzer for rules discovers a bug, it outputs source code that reproduces the bug. However, since the source code is autogenerated, it is often extremely complex and contains lots of parts that are irrelevant to the cause of the bug, making it tedious to figure out what the bug actually is. This commit adds a "code sample minimizer" to the fuzzer that tries to remove irrelevant parts of the AST so that the resulting code sample is as small as possible while still reproducing the issue found by the fuzzer. As a demonstration, the minimizer would have reduced the very large code sample from the fuzzing error in [this CI build](https://travis-ci.org/eslint/eslint/jobs/519526960) down to the following simplified code: ```js ($2 = $3) ? $4 : ($5) ```
eslint · May 11, 2019 · af81cb3 · af81cb3
1 parent 5537957
commit af81cb3
Show file tree

Hide file tree

Showing 5 changed files with 355 additions and 19 deletions.
diff --git a/package.json b/package.json
@@ -122,6 +122,7 @@
     "nyc": "^13.3.0",
     "proxyquire": "^2.0.1",
     "puppeteer": "^1.14.0",
+    "recast": "^0.17.6",
     "shelljs": "^0.8.2",
     "sinon": "^3.3.0",
     "temp": "^0.9.0",

diff --git a/tests/tools/code-sample-minimizer.js b/tests/tools/code-sample-minimizer.js
@@ -0,0 +1,73 @@
+"use strict";
+
+const { assert } = require("chai");
+const reduceBadExampleSize = require("../../tools/code-sample-minimizer");
+
+describe("reduceBadExampleSize()", () => {
+    it("extracts relevant part of deeply nested code", () => {
+        const initialCode = `
+            if (true) {
+                while (false) {
+                    for (let i = 1; i < 10; i++) {
+                        let j = foo
+                            ? bar
+                            : THIS_EXPRESSION_CAUSES_A_BUG
+                    }
+                }
+            }
+        `;
+
+        const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG";
+
+        assert.strictEqual(
+            reduceBadExampleSize({
+                sourceText: initialCode,
+                predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG")
+            }),
+            expectedFinalCode
+        );
+    });
+
+    it("removes irrelevant parts of AST nodes with many children", () => {
+        const initialCode = `
+            foo;
+            bar;
+            baz;
+            let x = [
+                1,
+                2,
+                ,
+                3,
+                THIS_EXPRESSION_CAUSES_A_BUG,
+                4
+            ]
+            quux;
+        `;
+
+        const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG";
+
+        assert.strictEqual(
+            reduceBadExampleSize({
+                sourceText: initialCode,
+                predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG")
+            }),
+            expectedFinalCode
+        );
+    });
+
+    it("removes irrelevant comments from the source code", () => {
+        const initialCode = `
+        var /* aaa */foo = bar;
+    `;
+
+        const expectedFinalCode = "var foo = bar;";
+
+        assert.strictEqual(
+            reduceBadExampleSize({
+                sourceText: initialCode,
+                predicate: code => code.includes("var") && code.includes("foo = bar")
+            }),
+            expectedFinalCode
+        );
+    });
+});
diff --git a/tests/tools/eslint-fuzzer.js b/tests/tools/eslint-fuzzer.js
@@ -44,9 +44,11 @@ describe("eslint-fuzzer", function() {
     describe("when running in crash-only mode", () => {
         describe("when a rule crashes on the given input", () => {
             it("should report the crash with a minimal config", () => {
-                fakeRule = () => ({
+                fakeRule = context => ({
                     Program() {
-                        throw CRASH_BUG;
+                        if (context.getSourceCode().text === "foo") {
+                            throw CRASH_BUG;
+                        }
                     }
                 });
 
@@ -81,9 +83,11 @@ describe("eslint-fuzzer", function() {
 
         describe("when a rule crashes on the given input", () => {
             it("should report the crash with a minimal config", () => {
-                fakeRule = () => ({
+                fakeRule = context => ({
                     Program() {
-                        throw CRASH_BUG;
+                        if (context.getSourceCode().text === "foo") {
+                            throw CRASH_BUG;
+                        }
                     }
                 });
 
@@ -103,7 +107,7 @@ describe("eslint-fuzzer", function() {
                 // Replaces programs that start with "foo" with "bar"
                 fakeRule = context => ({
                     Program(node) {
-                        if (context.getSourceCode().text.startsWith("foo")) {
+                        if (context.getSourceCode().text === `foo ${disableFixableRulesComment}`) {
                             context.report({
                                 node,
                                 message: "no foos allowed",
@@ -137,7 +141,7 @@ describe("eslint-fuzzer", function() {
                     Program(node) {
                         const sourceCode = context.getSourceCode();
 
-                        if (sourceCode.text.startsWith("foo")) {
+                        if (sourceCode.text === `foo ${disableFixableRulesComment}`) {
                             context.report({
                                 node,
                                 message: "no foos allowed",
@@ -178,7 +182,7 @@ describe("eslint-fuzzer", function() {
                     Program(node) {
                         const sourceCode = context.getSourceCode();
 
-                        if (sourceCode.text.startsWith("foo") || sourceCode.text.startsWith("bar")) {
+                        if (sourceCode.text.startsWith("foo") || sourceCode.text === intermediateCode) {
                             context.report({
                                 node,
                                 message: "no foos allowed",
@@ -229,7 +233,7 @@ describe("eslint-fuzzer", function() {
                                 message: "no foos allowed",
                                 fix: fixer => fixer.replaceText(node, "bar")
                             });
-                        } else if (sourceCode.text.startsWith("bar")) {
+                        } else if (sourceCode.text === `bar ${disableFixableRulesComment}`) {
                             throw CRASH_BUG;
                         }
                     }
@@ -245,8 +249,7 @@ describe("eslint-fuzzer", function() {
                 assert.strictEqual(results.length, 1);
                 assert.strictEqual(results[0].type, "crash");
 
-                // TODO: (not-an-aardvark) It might be more useful to output the intermediate code here.
-                assert.strictEqual(results[0].text, `foo ${disableFixableRulesComment}`);
+                assert.strictEqual(results[0].text, `bar ${disableFixableRulesComment}`);
                 assert.deepStrictEqual(results[0].config.rules, { "test-fuzzer-rule": 2 });
                 assert.strictEqual(results[0].error, CRASH_BUG.stack);
             });

diff --git a/tools/code-sample-minimizer.js b/tools/code-sample-minimizer.js
@@ -0,0 +1,206 @@
+"use strict";
+
+const evk = require("eslint-visitor-keys");
+const recast = require("recast");
+const espree = require("espree");
+const assert = require("assert");
+
+/**
+ * Determines whether an AST node could be an expression, based on the type
+ * @param {ASTNode} node The node
+ * @returns {boolean} `true` if the node could be an expression
+ */
+function isMaybeExpression(node) {
+    return node.type.endsWith("Expression") ||
+        node.type === "Identifier" ||
+        node.type === "MetaProperty" ||
+        node.type.endsWith("Literal");
+}
+
+/**
+ * Determines whether an AST node is a statement
+ * @param {ASTNode} node The node
+ * @returns {boolean} `true` if the node is a statement
+ */
+function isStatement(node) {
+    return node.type.endsWith("Statement") || node.type.endsWith("Declaration");
+}
+
+/**
+ * Given "bad" source text (e.g. an code sample that causes a rule to crash), tries to return a smaller
+ * piece of source text which is also "bad", to make it easier for a human to figure out where the
+ * problem is.
+ * @param {string} options.sourceText Initial piece of "bad" source text
+ * @param {function(string): boolean} options.predicate A predicate that returns `true` for bad source text and `false` for good source text
+ * @param {Parser} [options.parser] The parser used to parse the source text. Defaults to a modified
+ * version of espree that uses recent parser options.
+ * @param {Object} [options.visitorKeys] The visitor keys of the AST. Defaults to eslint-visitor-keys.
+ * @returns {string} Another piece of "bad" source text, which may or may not be smaller than the original source text.
+ */
+function reduceBadExampleSize({
+    sourceText,
+    predicate,
+    parser = {
+        parse: (code, options) =>
+            espree.parse(code, {
+                ...options,
+                loc: true,
+                range: true,
+                raw: true,
+                tokens: true,
+                comment: true,
+                eslintVisitorKeys: true,
+                eslintScopeManager: true,
+                ecmaVersion: 2018,
+                sourceType: "script"
+            })
+    },
+    visitorKeys = evk.KEYS
+}) {
+    let counter = 0;
+
+    /**
+     * Returns a new unique identifier
+     * @returns {string} A name for a new identifier
+     */
+    function generateNewIdentifierName() {
+        return `$${(counter++)}`;
+    }
+
+    /**
+     * Determines whether a source text sample is "bad"
+     * @param {string} updatedSourceText The sample
+     * @returns {boolean} `true` if the sample is "bad"
+     */
+    function reproducesBadCase(updatedSourceText) {
+        try {
+            parser.parse(updatedSourceText);
+        } catch (err) {
+            return false;
+        }
+
+        return predicate(updatedSourceText);
+    }
+
+    assert(reproducesBadCase(sourceText), "Original source text should reproduce issue");
+    const parseResult = recast.parse(sourceText, { parser });
+
+    /**
+     * Recursively removes descendant subtrees of the given AST node and replaces
+     * them with simplified variants to produce a simplified AST which is still considered "bad".
+     * @param {ASTNode} node An AST node to prune. May be mutated by this call, but the
+     * resulting AST will still produce "bad" source code.
+     * @returns {void}
+     */
+    function pruneIrrelevantSubtrees(node) {
+        for (const key of visitorKeys[node.type]) {
+            if (Array.isArray(node[key])) {
+                for (let index = node[key].length - 1; index >= 0; index--) {
+                    const [childNode] = node[key].splice(index, 1);
+
+                    if (!reproducesBadCase(recast.print(parseResult).code)) {
+                        node[key].splice(index, 0, childNode);
+                        if (childNode) {
+                            pruneIrrelevantSubtrees(childNode);
+                        }
+                    }
+                }
+            } else if (typeof node[key] === "object" && node[key] !== null) {
+
+                const childNode = node[key];
+
+                if (isMaybeExpression(childNode)) {
+                    node[key] = { type: "Identifier", name: generateNewIdentifierName(), range: childNode.range };
+                    if (!reproducesBadCase(recast.print(parseResult).code)) {
+                        node[key] = childNode;
+                        pruneIrrelevantSubtrees(childNode);
+                    }
+                } else if (isStatement(childNode)) {
+                    node[key] = { type: "EmptyStatement", range: childNode.range };
+                    if (!reproducesBadCase(recast.print(parseResult).code)) {
+                        node[key] = childNode;
+                        pruneIrrelevantSubtrees(childNode);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Recursively tries to extract a descendant node from the AST that is "bad" on its own
+     * @param {ASTNode} node A node which produces "bad" source code
+     * @returns {ASTNode} A descendent of `node` which is also bad
+     */
+    function extractRelevantChild(node) {
+        const childNodes = [].concat(
+            ...visitorKeys[node.type]
+                .map(key => (Array.isArray(node[key]) ? node[key] : [node[key]]))
+        );
+
+        for (const childNode of childNodes) {
+            if (!childNode) {
+                continue;
+            }
+
+            if (isMaybeExpression(childNode)) {
+                if (reproducesBadCase(recast.print(childNode).code)) {
+                    return extractRelevantChild(childNode);
+                }
+
+            } else if (isStatement(childNode)) {
+                if (reproducesBadCase(recast.print(childNode).code)) {
+                    return extractRelevantChild(childNode);
+                }
+            } else {
+                const childResult = extractRelevantChild(childNode);
+
+                if (reproducesBadCase(recast.print(childResult).code)) {
+                    return childResult;
+                }
+            }
+        }
+        return node;
+    }
+
+    /**
+     * Removes and simplifies comments from the source text
+     * @param {string} text A piece of "bad" source text
+     * @returns {string} A piece of "bad" source text with fewer and/or simpler comments.
+     */
+    function removeIrrelevantComments(text) {
+        const ast = parser.parse(text);
+
+        if (ast.comments) {
+            for (const comment of ast.comments) {
+                for (const potentialSimplification of [
+
+                    // Try deleting the comment
+                    `${text.slice(0, comment.range[0])}${text.slice(comment.range[1])}`,
+
+                    // Try replacing the comment with a space
+                    `${text.slice(0, comment.range[0])} ${text.slice(comment.range[1])}`,
+
+                    // Try deleting the contents of the comment
+                    text.slice(0, comment.range[0] + 2) + text.slice(comment.type === "Block" ? comment.range[1] - 2 : comment.range[1])
+                ]) {
+                    if (reproducesBadCase(potentialSimplification)) {
+                        return removeIrrelevantComments(potentialSimplification);
+                    }
+                }
+            }
+        }
+
+        return text;
+    }
+
+    pruneIrrelevantSubtrees(parseResult.program);
+    const relevantChild = recast.print(extractRelevantChild(parseResult.program)).code;
+
+    assert(reproducesBadCase(relevantChild), "Extracted relevant source text should reproduce issue");
+    const result = removeIrrelevantComments(relevantChild);
+
+    assert(reproducesBadCase(result), "Source text with irrelevant comments removed should reproduce issue");
+    return result;
+}
+
+module.exports = reduceBadExampleSize;