Skip to content

Commit

Permalink
Chore: make fuzzer produce minimal reproducible examples of bugs (#11700
Browse files Browse the repository at this point in the history
)

When the fuzzer for rules discovers a bug, it outputs source code that reproduces the bug. However, since the source code is autogenerated, it is often extremely complex and contains lots of parts that are irrelevant to the cause of the bug, making it tedious to figure out what the bug actually is.

This commit adds a "code sample minimizer" to the fuzzer that tries to remove irrelevant parts of the AST so that the resulting code sample is as small as possible while still reproducing the issue found by the fuzzer.

As a demonstration, the minimizer would have reduced the very large code sample from the fuzzing error in [this CI build](https://travis-ci.org/eslint/eslint/jobs/519526960) down to the following simplified code:

```js
($2 = $3) ?
$4 : ($5)
```
  • Loading branch information
not-an-aardvark committed May 11, 2019
1 parent 5537957 commit af81cb3
Show file tree
Hide file tree
Showing 5 changed files with 355 additions and 19 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
"nyc": "^13.3.0",
"proxyquire": "^2.0.1",
"puppeteer": "^1.14.0",
"recast": "^0.17.6",
"shelljs": "^0.8.2",
"sinon": "^3.3.0",
"temp": "^0.9.0",
Expand Down
73 changes: 73 additions & 0 deletions tests/tools/code-sample-minimizer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"use strict";

const { assert } = require("chai");
const reduceBadExampleSize = require("../../tools/code-sample-minimizer");

describe("reduceBadExampleSize()", () => {
it("extracts relevant part of deeply nested code", () => {
const initialCode = `
if (true) {
while (false) {
for (let i = 1; i < 10; i++) {
let j = foo
? bar
: THIS_EXPRESSION_CAUSES_A_BUG
}
}
}
`;

const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG";

assert.strictEqual(
reduceBadExampleSize({
sourceText: initialCode,
predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG")
}),
expectedFinalCode
);
});

it("removes irrelevant parts of AST nodes with many children", () => {
const initialCode = `
foo;
bar;
baz;
let x = [
1,
2,
,
3,
THIS_EXPRESSION_CAUSES_A_BUG,
4
]
quux;
`;

const expectedFinalCode = "THIS_EXPRESSION_CAUSES_A_BUG";

assert.strictEqual(
reduceBadExampleSize({
sourceText: initialCode,
predicate: code => code.includes("THIS_EXPRESSION_CAUSES_A_BUG")
}),
expectedFinalCode
);
});

it("removes irrelevant comments from the source code", () => {
const initialCode = `
var /* aaa */foo = bar;
`;

const expectedFinalCode = "var foo = bar;";

assert.strictEqual(
reduceBadExampleSize({
sourceText: initialCode,
predicate: code => code.includes("var") && code.includes("foo = bar")
}),
expectedFinalCode
);
});
});
23 changes: 13 additions & 10 deletions tests/tools/eslint-fuzzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ describe("eslint-fuzzer", function() {
describe("when running in crash-only mode", () => {
describe("when a rule crashes on the given input", () => {
it("should report the crash with a minimal config", () => {
fakeRule = () => ({
fakeRule = context => ({
Program() {
throw CRASH_BUG;
if (context.getSourceCode().text === "foo") {
throw CRASH_BUG;
}
}
});

Expand Down Expand Up @@ -81,9 +83,11 @@ describe("eslint-fuzzer", function() {

describe("when a rule crashes on the given input", () => {
it("should report the crash with a minimal config", () => {
fakeRule = () => ({
fakeRule = context => ({
Program() {
throw CRASH_BUG;
if (context.getSourceCode().text === "foo") {
throw CRASH_BUG;
}
}
});

Expand All @@ -103,7 +107,7 @@ describe("eslint-fuzzer", function() {
// Replaces programs that start with "foo" with "bar"
fakeRule = context => ({
Program(node) {
if (context.getSourceCode().text.startsWith("foo")) {
if (context.getSourceCode().text === `foo ${disableFixableRulesComment}`) {
context.report({
node,
message: "no foos allowed",
Expand Down Expand Up @@ -137,7 +141,7 @@ describe("eslint-fuzzer", function() {
Program(node) {
const sourceCode = context.getSourceCode();

if (sourceCode.text.startsWith("foo")) {
if (sourceCode.text === `foo ${disableFixableRulesComment}`) {
context.report({
node,
message: "no foos allowed",
Expand Down Expand Up @@ -178,7 +182,7 @@ describe("eslint-fuzzer", function() {
Program(node) {
const sourceCode = context.getSourceCode();

if (sourceCode.text.startsWith("foo") || sourceCode.text.startsWith("bar")) {
if (sourceCode.text.startsWith("foo") || sourceCode.text === intermediateCode) {
context.report({
node,
message: "no foos allowed",
Expand Down Expand Up @@ -229,7 +233,7 @@ describe("eslint-fuzzer", function() {
message: "no foos allowed",
fix: fixer => fixer.replaceText(node, "bar")
});
} else if (sourceCode.text.startsWith("bar")) {
} else if (sourceCode.text === `bar ${disableFixableRulesComment}`) {
throw CRASH_BUG;
}
}
Expand All @@ -245,8 +249,7 @@ describe("eslint-fuzzer", function() {
assert.strictEqual(results.length, 1);
assert.strictEqual(results[0].type, "crash");

// TODO: (not-an-aardvark) It might be more useful to output the intermediate code here.
assert.strictEqual(results[0].text, `foo ${disableFixableRulesComment}`);
assert.strictEqual(results[0].text, `bar ${disableFixableRulesComment}`);
assert.deepStrictEqual(results[0].config.rules, { "test-fuzzer-rule": 2 });
assert.strictEqual(results[0].error, CRASH_BUG.stack);
});
Expand Down
206 changes: 206 additions & 0 deletions tools/code-sample-minimizer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"use strict";

const evk = require("eslint-visitor-keys");
const recast = require("recast");
const espree = require("espree");
const assert = require("assert");

/**
* Determines whether an AST node could be an expression, based on the type
* @param {ASTNode} node The node
* @returns {boolean} `true` if the node could be an expression
*/
function isMaybeExpression(node) {
return node.type.endsWith("Expression") ||
node.type === "Identifier" ||
node.type === "MetaProperty" ||
node.type.endsWith("Literal");
}

/**
* Determines whether an AST node is a statement
* @param {ASTNode} node The node
* @returns {boolean} `true` if the node is a statement
*/
function isStatement(node) {
return node.type.endsWith("Statement") || node.type.endsWith("Declaration");
}

/**
* Given "bad" source text (e.g. an code sample that causes a rule to crash), tries to return a smaller
* piece of source text which is also "bad", to make it easier for a human to figure out where the
* problem is.
* @param {string} options.sourceText Initial piece of "bad" source text
* @param {function(string): boolean} options.predicate A predicate that returns `true` for bad source text and `false` for good source text
* @param {Parser} [options.parser] The parser used to parse the source text. Defaults to a modified
* version of espree that uses recent parser options.
* @param {Object} [options.visitorKeys] The visitor keys of the AST. Defaults to eslint-visitor-keys.
* @returns {string} Another piece of "bad" source text, which may or may not be smaller than the original source text.
*/
function reduceBadExampleSize({
sourceText,
predicate,
parser = {
parse: (code, options) =>
espree.parse(code, {
...options,
loc: true,
range: true,
raw: true,
tokens: true,
comment: true,
eslintVisitorKeys: true,
eslintScopeManager: true,
ecmaVersion: 2018,
sourceType: "script"
})
},
visitorKeys = evk.KEYS
}) {
let counter = 0;

/**
* Returns a new unique identifier
* @returns {string} A name for a new identifier
*/
function generateNewIdentifierName() {
return `$${(counter++)}`;
}

/**
* Determines whether a source text sample is "bad"
* @param {string} updatedSourceText The sample
* @returns {boolean} `true` if the sample is "bad"
*/
function reproducesBadCase(updatedSourceText) {
try {
parser.parse(updatedSourceText);
} catch (err) {
return false;
}

return predicate(updatedSourceText);
}

assert(reproducesBadCase(sourceText), "Original source text should reproduce issue");
const parseResult = recast.parse(sourceText, { parser });

/**
* Recursively removes descendant subtrees of the given AST node and replaces
* them with simplified variants to produce a simplified AST which is still considered "bad".
* @param {ASTNode} node An AST node to prune. May be mutated by this call, but the
* resulting AST will still produce "bad" source code.
* @returns {void}
*/
function pruneIrrelevantSubtrees(node) {
for (const key of visitorKeys[node.type]) {
if (Array.isArray(node[key])) {
for (let index = node[key].length - 1; index >= 0; index--) {
const [childNode] = node[key].splice(index, 1);

if (!reproducesBadCase(recast.print(parseResult).code)) {
node[key].splice(index, 0, childNode);
if (childNode) {
pruneIrrelevantSubtrees(childNode);
}
}
}
} else if (typeof node[key] === "object" && node[key] !== null) {

const childNode = node[key];

if (isMaybeExpression(childNode)) {
node[key] = { type: "Identifier", name: generateNewIdentifierName(), range: childNode.range };
if (!reproducesBadCase(recast.print(parseResult).code)) {
node[key] = childNode;
pruneIrrelevantSubtrees(childNode);
}
} else if (isStatement(childNode)) {
node[key] = { type: "EmptyStatement", range: childNode.range };
if (!reproducesBadCase(recast.print(parseResult).code)) {
node[key] = childNode;
pruneIrrelevantSubtrees(childNode);
}
}
}
}
}

/**
* Recursively tries to extract a descendant node from the AST that is "bad" on its own
* @param {ASTNode} node A node which produces "bad" source code
* @returns {ASTNode} A descendent of `node` which is also bad
*/
function extractRelevantChild(node) {
const childNodes = [].concat(
...visitorKeys[node.type]
.map(key => (Array.isArray(node[key]) ? node[key] : [node[key]]))
);

for (const childNode of childNodes) {
if (!childNode) {
continue;
}

if (isMaybeExpression(childNode)) {
if (reproducesBadCase(recast.print(childNode).code)) {
return extractRelevantChild(childNode);
}

} else if (isStatement(childNode)) {
if (reproducesBadCase(recast.print(childNode).code)) {
return extractRelevantChild(childNode);
}
} else {
const childResult = extractRelevantChild(childNode);

if (reproducesBadCase(recast.print(childResult).code)) {
return childResult;
}
}
}
return node;
}

/**
* Removes and simplifies comments from the source text
* @param {string} text A piece of "bad" source text
* @returns {string} A piece of "bad" source text with fewer and/or simpler comments.
*/
function removeIrrelevantComments(text) {
const ast = parser.parse(text);

if (ast.comments) {
for (const comment of ast.comments) {
for (const potentialSimplification of [

// Try deleting the comment
`${text.slice(0, comment.range[0])}${text.slice(comment.range[1])}`,

// Try replacing the comment with a space
`${text.slice(0, comment.range[0])} ${text.slice(comment.range[1])}`,

// Try deleting the contents of the comment
text.slice(0, comment.range[0] + 2) + text.slice(comment.type === "Block" ? comment.range[1] - 2 : comment.range[1])
]) {
if (reproducesBadCase(potentialSimplification)) {
return removeIrrelevantComments(potentialSimplification);
}
}
}
}

return text;
}

pruneIrrelevantSubtrees(parseResult.program);
const relevantChild = recast.print(extractRelevantChild(parseResult.program)).code;

assert(reproducesBadCase(relevantChild), "Extracted relevant source text should reproduce issue");
const result = removeIrrelevantComments(relevantChild);

assert(reproducesBadCase(result), "Source text with irrelevant comments removed should reproduce issue");
return result;
}

module.exports = reduceBadExampleSize;
Loading

0 comments on commit af81cb3

Please sign in to comment.