Skip to content

Commit

Permalink
feat(nlcst-pattern-match): add MatchResult type
Browse files Browse the repository at this point in the history
  • Loading branch information
azu committed Oct 8, 2017
1 parent 1e97ca6 commit 73c1434
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 21 deletions.
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,37 @@ import Parser from "parse-english";
const matcher = PatternMatcher({
parser: new Parser()
});
const pattern = matcher.tag`This is a ${noun()}`;
const text = "This is a pen";
const pattern = matcher.tag`This is a ${noun()}`;
const results = matcher.match(text, pattern);
/*
[
{
index: 10,
text: "pen",
nodes: []
nodeList: []
}
]
*/
```

Make named capture

```js
const dict = {
// https://developers.google.com/web/updates/2017/07/upcoming-regexp-features
pattern: /This is (?<noun>\w+)/,
replace: 'This is a $<noun>',
message: ({ noun }) => {
return `$<noun> is not noun`;
},
test: ({ noun }) => {
return new Tag(noun).type === "noun"
}
};

```


## Changelog

Expand Down
42 changes: 26 additions & 16 deletions packages/nlcst-pattern-match/src/nlcst-pattern-match.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ function matchNode(actualNode: Node, expectedNode: Node): boolean {
});
}

function match(parent: Sentence, expectedNode: Sentence) {
function match(text: string, parent: Sentence, expectedNode: Sentence): MatchResult[] {
if (!isSentence(parent)) {
throw new Error(`Expected sentence node: ${JSON.stringify(parent)}`);
}
Expand All @@ -69,7 +69,7 @@ function match(parent: Sentence, expectedNode: Sentence) {
const expectedChildren = expectedNode.children;
const tokenCount = expectedChildren.length;
const matchTokens: Node[] = [];
const results: { position: Position | undefined; nodeList: Node[] }[] = [];
const results: MatchResult[] = [];
let currentTokenPosition = 0;
let index = 0;
for (index = 0; index < children.length; index++) {
Expand Down Expand Up @@ -120,15 +120,19 @@ function match(parent: Sentence, expectedNode: Sentence) {
matchTokens.length = 0;
const firstNode = tokens[0];
const lastNode = tokens[tokens.length - 1];
if (!firstNode.position) {
throw new Error(`The node has not position: ${firstNode}`);
}
if (!lastNode.position) {
throw new Error(`The node has not position: ${firstNode}`);
}
results.push({
position:
firstNode.position && lastNode.position
? {
start: firstNode.position.start,
end: lastNode.position.end,
index: firstNode.index
}
: undefined,
text: text.slice(firstNode.position.start.offset, lastNode.position.end.offset),
position: {
start: firstNode.position.start,
end: lastNode.position.end,
index: firstNode.position.start.offset
},
nodeList: tokens
});
}
Expand All @@ -149,26 +153,32 @@ export interface PatternNode extends Node {
pattern: RegExp;
}

export interface MatchResult {
text: string;
position: Position | undefined;
nodeList: Node[];
}

export class PatternMatcher {
private parser: { parse: ((text: string) => Root) };

constructor(args: PatternMatcherArgs) {
this.parser = args.parser;
}

match(text: string, pattern: Sentence) {
match(text: string, pattern: Sentence): MatchResult[] {
if (typeof text !== "string") {
throw new Error(
"Invalid Arguments: match(text: string, pattern: Sentence)\n" +
"matcher.match(text, matcher.tag`pattern`)"
"matcher.match(text, matcher.tag`pattern`)"
);
}
let allResults: { position: Position | undefined; nodeList: Node[] }[] = [];
let allResults: MatchResult[] = [];
const AST = this.parser.parse(text);
walk(AST, {
enter: function (node: Node) {
enter: function(node: Node) {
if (isSentence(node)) {
const results = match(node, pattern);
const results = match(text, node, pattern);
allResults = allResults.concat(results);
this.skip();
}
Expand Down Expand Up @@ -237,7 +247,7 @@ export class PatternMatcher {
});
const AST = this.parser.parse(allString);
walk(AST, {
enter: function (node: Node, parent: Parent) {
enter: function(node: Node, parent: Parent) {
replaceHolders
.filter(replaceHolder => {
return node.position!.start.offset === replaceHolder.start;
Expand Down
124 changes: 121 additions & 3 deletions packages/nlcst-pattern-match/test/nlcst-pattern-match-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as assert from "assert";
import { isWord } from "nlcst-types";
import { EnglishParser } from "nlcst-parse-english";

const toString = require('nlcst-to-string');
const toString = require("nlcst-to-string");
// const inspect = require('unist-util-inspect');

describe("nlcst-pattern-match", () => {
Expand Down Expand Up @@ -119,7 +119,122 @@ ${JSON.stringify(actual)}
});

describe("#match", () => {
it("match and replace", () => {
it("should return MatchResult[]", () => {
const englishParser = new EnglishParser();
const patternMatcher = new PatternMatcher({
parser: englishParser
});
const pattern = patternMatcher.tag`Bob ${{
type: "*",
data: {
pos: /^VB/ // verb
}
}} it.`;
const text = "Bob does it.";
const results = patternMatcher.match(text, pattern);
assert.equal(results.length, 1, "results should have 1");
const [result] = results;
assert.deepEqual(result.position, {
index: 0,
end: {
column: 13,
line: 1,
offset: 12
},
start: {
column: 1,
line: 1,
offset: 0
}
});
assert.deepEqual(
result.nodeList,
[
{
type: "WordNode",
children: [
{
type: "TextNode",
value: "Bob",
position: {
start: { line: 1, column: 1, offset: 0 },
end: { line: 1, column: 4, offset: 3 }
}
}
],
position: {
start: { line: 1, column: 1, offset: 0 },
end: { line: 1, column: 4, offset: 3 }
},
data: { pos: "NNP" }
},
{
type: "WhiteSpaceNode",
value: " ",
position: {
start: { line: 1, column: 4, offset: 3 },
end: { line: 1, column: 5, offset: 4 }
}
},
{
type: "WordNode",
children: [
{
type: "TextNode",
value: "does",
position: {
start: { line: 1, column: 5, offset: 4 },
end: { line: 1, column: 9, offset: 8 }
}
}
],
position: {
start: { line: 1, column: 5, offset: 4 },
end: { line: 1, column: 9, offset: 8 }
},
data: { pos: "VBZ" }
},
{
type: "WhiteSpaceNode",
value: " ",
position: {
start: { line: 1, column: 9, offset: 8 },
end: { line: 1, column: 10, offset: 9 }
}
},
{
type: "WordNode",
children: [
{
type: "TextNode",
value: "it",
position: {
start: { line: 1, column: 10, offset: 9 },
end: { line: 1, column: 12, offset: 11 }
}
}
],
position: {
start: { line: 1, column: 10, offset: 9 },
end: { line: 1, column: 12, offset: 11 }
},
data: { pos: "PRP" }
},
{
type: "PunctuationNode",
value: ".",
position: {
start: { line: 1, column: 12, offset: 11 },
end: { line: 1, column: 13, offset: 12 }
},
data: { pos: "." }
}
],
`\n${JSON.stringify(result.nodeList)}\n`
);
assert.strictEqual(result.text, "Bob does it.");
});
it("match data and pattern", () => {
const englishParser = new EnglishParser();
const patternMatcher = new PatternMatcher({
parser: englishParser
Expand All @@ -135,7 +250,10 @@ ${JSON.stringify(actual)}
}}.`;
const text = "Click Delete if you want to delete the entire document.";
const results = patternMatcher.match(text, pattern);
assert.strictEqual(toString(results[0].nodeList), "Click Delete if you want to delete the entire document.");
assert.strictEqual(
toString(results[0].nodeList),
"Click Delete if you want to delete the entire document."
);
});
it("match regexp", () => {
const englishParser = new EnglishParser();
Expand Down

0 comments on commit 73c1434

Please sign in to comment.