Skip to content

Commit

Permalink
Add literal matching, make default
Browse files Browse the repository at this point in the history
  • Loading branch information
danielstjules committed Mar 20, 2017
1 parent 1f4bc4a commit 27aec53
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 28 deletions.
17 changes: 11 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@ You have the freedom to specify a threshold determining the smallest subset of
nodes to analyze. This will identify code with a similar structure, based
on the AST node types, e.g. BlockStatement, VariableDeclaration,
ObjectExpression, etc. By default, it searches nodes with matching identifiers
for copy-paste oriented detection, but this can be disabled.
and literals for copy-paste oriented detection, but this can be disabled.
For context, identifiers include the names of variables, methods, properties,
etc, while literals are strings, numbers, etc.

The tool accepts a list of paths to parse and prints any found matches. Any
directories among the paths are walked recursively, and only `.js` and `.jsx`
files are analyzed. Any `node_modules` and `bower_components` dirs are also
files are analyzed. You can explicitly pass file paths that include a different
extension as well. Any `node_modules` and `bower_components` dirs are also
ignored.

![screenshot](http://danielstjules.com/github/jsinspect-example.png)
Expand All @@ -59,7 +62,7 @@ Usage: jsinspect [options] <paths ...>
Detect copy-pasted and structurally similar JavaScript code
Example use: jsinspect -I --ignore "test" ./path/to/src
Example use: jsinspect -I -L --ignore "test" ./path/to/src
Options:
Expand All @@ -71,6 +74,7 @@ Options:
-c, --config path to config file (default: .jsinspectrc)
-r, --reporter [default|json|pmd] specify the reporter to use
-I, --no-identifiers do not match identifiers
-L, --no-literals do not match literals
-C, --no-color disable colors
--ignore <pattern> ignore paths matching a regex
--truncate <number> length to truncate lines (default: 100, off: 0)
Expand All @@ -83,6 +87,7 @@ be used in place of the defaults listed above. For example:
{
"threshold": 30,
"identifiers": true,
"literals": true,
"ignore": "Test.js|Spec.js",
"reporter": "json",
"truncate": 100,
Expand All @@ -97,9 +102,9 @@ test/spec dir.
jsinspect -t 50 --ignore "test" ./path/to/src
```

From there, feel free to try decreasing the threshold and ignoring identifiers
using the `-I` flag. A threshold of 30 may lead you to discover new areas of
interest for refactoring or cleanup.
From there, feel free to try decreasing the threshold, ignoring identifiers
using the `-I` flag and ignoring literals with `-L`. A lower threshold may lead
you to discover new areas of interest for refactoring or cleanup.

## Integration

Expand Down
6 changes: 4 additions & 2 deletions bin/jsinspect
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ var reporters = require('../lib/reporters');

var summary = `
Detect copy-pasted and structurally similar JavaScript code
Example use: jsinspect -I --ignore "test" ./path/to/src
Example use: jsinspect -I -L --ignore "test" ./path/to/src
`;

program
Expand All @@ -26,6 +26,7 @@ program
.option('-r, --reporter [default|json|pmd]',
'specify the reporter to use')
.option('-I, --no-identifiers', 'do not match identifiers')
.option('-L, --no-literals', 'do not match literals')
.option('-C, --no-color', 'disable colors')
.option('--ignore <pattern>', 'ignore paths matching a regex')
.option('--truncate <number>',
Expand All @@ -46,7 +47,7 @@ if (fs.existsSync(rcPath) && fs.lstatSync(rcPath).isFile()) {
process.exit(3);
}

['threshold', 'identifiers', 'ignore', 'minInstances',
['threshold', 'identifiers', 'literals', 'ignore', 'minInstances',
'reporter', 'truncate'].forEach((option) => {
if (program[option] === undefined && (option in rc)) {
program[option] = rc[option];
Expand Down Expand Up @@ -86,6 +87,7 @@ if (!paths.length) {
var inspector = new Inspector(paths, {
threshold: program.threshold,
identifiers: program.identifiers,
literals: program.literals,
minInstances: program.minInstances
});

Expand Down
58 changes: 40 additions & 18 deletions lib/inspector.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ class Inspector extends EventEmitter {
/**
* Creates a new Inspector, which extends EventEmitter. filePaths is expected
* to be an array of string paths. Also accepts an options object with any
* combination of the following: threshold, identifiers and minInstances.
* Threshold indicates the minimum number of nodes to analyze. Identifiers
* indicates whether or not the nodes in a match should also have matching
* identifiers, and minInstances specifies the min number of instances for a
* match. An instance of Inspector emits the following events: start, match
* and end.
* combination of the following: threshold, identifiers literals, and
* minInstances. Threshold indicates the minimum number of nodes to analyze.
* Identifiers indicates whether or not the nodes in a match should also have
* matching identifiers, and literals whether or not literal values should
* match. minInstances specifies the min number of instances for a match.
* An instance of Inspector emits the following events: start, match and end.
*
* @constructor
* @extends EventEmitter
Expand All @@ -30,8 +30,9 @@ class Inspector extends EventEmitter {
this._filePaths = filePaths || [];
this._threshold = opts.threshold || 30;
this._identifiers = opts.identifiers;
this._literals = opts.literals;
this._minInstances = opts.minInstances || 2;
this._map = {};
this._map = Object.create(null);
this._fileContents = {};
this._traversals = {};
this.numFiles = this._filePaths.length;
Expand Down Expand Up @@ -144,7 +145,10 @@ class Inspector extends EventEmitter {
// groups will be of type Node[][][]
groups = [this._map[key].slice(0)];
if (this._identifiers) {
groups = this._groupByMatchingIds(groups);
groups = this._groupByMatchingIdentifiers(groups);
}
if (this._literals) {
groups = this._groupByMatchingLiterals(groups);
}

for (i = 0; i < groups.length; i++) {
Expand All @@ -167,7 +171,7 @@ class Inspector extends EventEmitter {
* @param {Node[][][]} groups
* @returns {Node[][][]}
*/
_groupByMatchingIds(groups) {
_groupByMatchingIdentifiers(groups) {
return this._group(groups, (nodes) => {
return nodes
.filter(node => node.name)
Expand All @@ -176,6 +180,24 @@ class Inspector extends EventEmitter {
});
}

/**
* Iterates over the multi-dimensional array of nodes, and returns a new
* array grouping them based on matching literals.
*
* @private
*
* @param {Node[][][]} groups
* @returns {Node[][][]}
*/
_groupByMatchingLiterals(groups) {
return this._group(groups, (nodes) => {
return nodes
.filter(node => node.type.includes('Literal'))
.map(node => node.value)
.join(':');
});
}

/**
* Expands each instance of a match to the largest common sequence of nodes
* with the same type, and optionally identifiers. Each array of nodes is
Expand All @@ -202,16 +224,16 @@ class Inspector extends EventEmitter {
return nodeArrays.some(array => array.indexOf(node) !== -1)
});
};
var isComplete = (nodes) => {
return (!nodeUtils.typesMatch(nodes) || alreadyIncluded(nodes)) ||
(this._identifiers && !nodeUtils.identifiersMatch(nodes)) ||
(this._literals && !nodeUtils.literalsMatch(nodes));
};

var nodes;
while (true) {
positions = positions.map(incr);
nodes = positions.map(getNode);
if (!nodeUtils.typesMatch(nodes) || alreadyIncluded(nodes)) {
return;
} else if (this._identifiers && !nodeUtils.identifiersMatch(nodes)) {
return;
}
var nodes = positions.map(getNode);
if (isComplete(nodes)) return;
nodeArrays.forEach((array, i) => array.push(nodes[i]));
}
}
Expand Down Expand Up @@ -296,7 +318,7 @@ class Inspector extends EventEmitter {
*/
_group(groups, fn) {
var res = [];
var map = {};
var map = Object.create(null);
var id, i, j;

for (i = 0; i < groups.length; i++) {
Expand All @@ -313,7 +335,7 @@ class Inspector extends EventEmitter {
res.push(map[id]);
}

map = {};
map = Object.create(null);
}

return res;
Expand Down
17 changes: 15 additions & 2 deletions lib/nodeUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,21 @@ class NodeUtils {
* @returns {boolean}
*/
static identifiersMatch(nodes) {
return nodes[0] && nodes[0].name && nodes.every(node => {
return node.name === nodes[0].name;
return nodes[0] && nodes.every(node => {
return node && node.name === nodes[0].name;
});
}

/**
* Returns whether or not all nodes have the same literal value.
*
* @param {Node[]} nodes
* @returns {boolean}
*/
static literalsMatch(nodes) {
return nodes[0] && nodes.every(node => {
return node && (!node.type.includes('Literal') ||
node.value === nodes[0].value);
});
}
}
Expand Down

0 comments on commit 27aec53

Please sign in to comment.