Permalink
Browse files

Fix patcher for multiple source modifications

The old method made the assumption that patches would be applied in source
order, which shouldn't be a restriction.

Also fixes a bug with replacing property initializers by mangling the AST. Adds
some test cases to verify that the patcher works.
  • Loading branch information...
Ryan Patterson
Ryan Patterson committed Feb 23, 2012
1 parent fbc46f6 commit 1f97effa22d5c07c20760a004fec927628162a39
Showing with 128 additions and 25 deletions.
  1. +64 −24 lib/jsgrep.js
  2. +1 −1 narcissus
  3. +63 −0 specs/jspatch.spec.js
View
@@ -22,14 +22,10 @@ function Matcher(ast, options) {
this.ast = ast;
this.options = options || {};
this.boundVars = this.options.boundVars;
+ this.top = this.options.top || this;
this.value = ast.value;
- // jsgrepDelta -> the offset (due to source modifications) of the parsed AST's
- // locations.
- // XXX: this relies on modifications happening *in source order*
- if (typeof this.ast.tokenizer.jsgrepDelta === 'undefined') {
- this.ast.tokenizer.jsgrepDelta = 0;
- }
+ Matcher.mangleAST(this.ast);
}
exports.Matcher = Matcher;
@@ -59,6 +55,7 @@ Matcher.compilePattern = function(pattern, filename, lineno) {
*/
Matcher.compilePatternNoCache = function(pattern, filename, lineno) {
var patternAst = Narcissus.parser.parse(pattern, filename, lineno);
+ Matcher.mangleAST(patternAst);
if (patternAst.children.length == 1) {
// Discard the script node
@@ -87,10 +84,7 @@ Matcher.identifierIsMetavar = function(identifier) {
* @param {Narcissus.parser.Node} target
*/
Matcher.getSourceForNode = function(target) {
- var tokenizer = target.tokenizer;
- var start = target.start + target.tokenizer.jsgrepDelta;
- var end = target.end + target.tokenizer.jsgrepDelta;
- return target.tokenizer.source.substring(start, end);
+ return target.tokenizer.source.substring(target.start, target.end + 1);
};
/**
@@ -116,6 +110,22 @@ Matcher.getPatternFromPatch = function(patch) {
return pattern.join('\n');
};
+/**
+ * Modify the AST of the node to be used with jsgrep.
+ *
+ * XXX(rpatterson): This is an ugly hack, necessary because if we are in a
+ * property initializer, the identifier that we see isn't really an identifier,
+ * it's a property name, so we need to label it as such.
+ */
+Matcher.mangleAST = function(ast) {
+ const tokens = Narcissus.definitions.tokenIds;
+ forEachNode(ast, function(node) {
+ if (node.type === tokens.PROPERTY_INIT) {
+ node.children[0].propertyName = true;
+ }
+ });
+};
+
/**
* Attempt to find the given pattern in the AST rooted at this Matcher. For each
* match of the pattern, the callback will be called. If the callback does not
@@ -127,6 +137,9 @@ Matcher.prototype.find = function(pattern, callback, options) {
var ret = [ ];
var self = this;
forEachNode(this.ast, function(node) {
+ if (node.removed) {
+ return;
+ }
if (self._match(node, pattern, callback, options || { })) {
ret.push(node);
}
@@ -291,7 +304,7 @@ Matcher.prototype.getPatchedCode = function(patch, filename, line) {
} else {
// Need to add pLexer tokens to nodeSource, update nLexer
var replacement = pLexer.source.substring(
- pLexer.token.start, pLexer.token.end);
+ pLexer.token.start, pLexer.token.end + 1);
if (t == tokens.IDENTIFIER &&
Matcher.identifierIsMetavar(pLexer.token.value) &&
@@ -319,7 +332,7 @@ Matcher.prototype.getPatchedCode = function(patch, filename, line) {
if (mode == REMOVE) {
var removedCount = nLexer.token.end - removeStart;
nodeSource = nodeSource.substring(0, removeStart) +
- nodeSource.substring(nLexer.token.end);
+ nodeSource.substring(nLexer.token.end + 1);
nLexer.source = nodeSource;
nLexer.cursor -= removedCount;
}
@@ -357,19 +370,42 @@ function tokenizerMatchAst(lexer, v) {
}
Matcher.prototype._replaceWithString = function(target, replacement) {
- if (!target.tokenizer.jsgrepDelta) {
- target.tokenizer.jsgrepDelta = 0;
- }
-
- var start = target.start + target.tokenizer.jsgrepDelta;
- var end = target.end + target.tokenizer.jsgrepDelta;
+ var start = target.start;
+ var end = target.end;
+ var origSource = target.tokenizer.source;
target.tokenizer.source =
target.tokenizer.source.substring(0, start) +
replacement +
- target.tokenizer.source.substring(end);
- target.tokenizer.jsgrepDelta += replacement.length - (end - start);
- target = target.tokenizer.source.substring(start, end);
+ target.tokenizer.source.substring(end + 1);
+
+ var delta = replacement.length - (end - start + 1);
+
+ forEachNode(this.top.ast, function(node) {
+ if (node.removed) {
+ return;
+ }
+ if (node.end < start) {
+ // Node occurs entirely before replaced region
+ } else if (end < node.start) {
+ // Node occurs entirely after replaced region
+ node.start += delta;
+ node.end += delta;
+ } else if (start <= node.start && node.end <= end) {
+ // Node occurs entirely within replaced region
+ node.start = 0;
+ node.end = 0;
+ node.removed = true;
+ } else if (node.start <= start && end <= node.end) {
+ // Node encapsulates the replaced region
+ node.end += delta;
+ } else {
+ throw new Error('Replaced `' + origSource.substring(start, end) +
+ '` at (' + start + ', ' + end + ') but got confused on `' +
+ origSource.substring(node.start, node.end) +
+ '` at (' + node.start + ', ' + node.end + ')');
+ }
+ });
};
Matcher.prototype._match = function(node, pattern, callback, matchOptions) {
@@ -380,14 +416,15 @@ Matcher.prototype._match = function(node, pattern, callback, matchOptions) {
if (astIsEqual(node, patternAst, matchOptions)) {
if (callback) {
var v = {};
- v.node = new Matcher(node, { boundVars: v });
+ v.node = new Matcher(node, { boundVars: v, top: this.top });
for (var i in matchOptions.variables) {
if (matchOptions.variables[i] === matchOptions.variables.ellipses) {
continue;
} else if (matchOptions.variables.hasOwnProperty(i)) {
v[i] = new Matcher(matchOptions.variables[i], {
name: i,
- parent: v.node
+ parent: v.node,
+ top: this.top
});
}
}
@@ -629,7 +666,9 @@ var astIsEqual = exports.astIsEqual = function(node, pattern, config) {
case tokens.STRING:
case tokens.THIS:
case tokens.TRUE:
- return node.value == pattern.value;
+ // propertyName shenanigans, see Matcher.mangleAST
+ return node.value == pattern.value &&
+ node.propertyName === pattern.propertyName;
break;
// 0-child statements
@@ -931,6 +970,7 @@ var forEachNode = exports.forEachNode = function(node, callback) {
case tokens.BREAK:
case tokens.CONTINUE:
case tokens.DEBUGGER:
+ case tokens.ELLIPSIS:
break;
// Unary expressions
Submodule narcissus updated 2 files
+1 −1 lib/jslex.js
+32 −22 lib/jsparse.js
View
@@ -0,0 +1,63 @@
+var Matcher = require('../lib/jsgrep.js').Matcher;
+var Narcissus = require('../narcissus');
+var _ = require('underscore');
+var fs = require('fs');
+var path = require('path');
+
+describe('Matcher.getPatchedCode', function() {
+ const fileName = path.resolve(path.join(__dirname, '../tests/jquery.js'));
+ var source = fs.readFileSync(fileName);
+
+ var tests = [
+ {
+ name: 'simple replacement',
+ source: '1',
+ patch: [
+ "-1",
+ "+2"
+ ].join('\n'),
+ result: '2'
+ }, {
+ name: 'two replacements',
+ source: '1, 1',
+ patch: [
+ "-1",
+ "+2"
+ ].join('\n'),
+ result: '2, 2'
+ }, {
+ name: 'remove var',
+ source: 'var a = b;',
+ patch: [
+ "-var a = b;"
+ ].join('\n'),
+ result: ''
+ }, {
+ name: 'match token type',
+ source: '({ "key": value })',
+ patch: [
+ "-key",
+ "+newKey"
+ ].join('\n'),
+ result: '({ "key": value })'
+ }
+ ];
+
+ function runTest(test) {
+ it (test.name, function() {
+ var sourceAst = Narcissus.parser.parse(test.source, 'test source');
+ var matcher = new Matcher(sourceAst);
+ var pattern = Matcher.getPatternFromPatch(test.patch, 'test patch');
+
+ matcher.find(pattern, function(v) {
+ v.node.applyPatch(test.patch, 'test patch');
+ });
+
+ expect(matcher.ast.tokenizer.source).toBe(test.result);
+ });
+ }
+
+ for (var i = 0; i < tests.length; i++) {
+ runTest(tests[i]);
+ }
+});

0 comments on commit 1f97eff

Please sign in to comment.