feat(nlcst-pattern-match): add MatchResult type

azu · Oct 8, 2017 · 73c1434 · 73c1434
1 parent 1e97ca6
commit 73c1434
Show file tree

Hide file tree

Showing 3 changed files with 166 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -27,20 +27,37 @@ import Parser from "parse-english";
 const matcher = PatternMatcher({
     parser: new Parser()
 });
-const pattern = matcher.tag`This is a ${noun()}`;
 const text = "This is a pen";
+const pattern = matcher.tag`This is a ${noun()}`;
 const results = matcher.match(text, pattern);
 /*
     [
         {
             index: 10,
             text: "pen",
-            nodes: []
+            nodeList: []
         }
     ]
  */
 ```
 
+Make named capture
+
+```js
+const dict = {
+    // https://developers.google.com/web/updates/2017/07/upcoming-regexp-features
+    pattern: /This is (?<noun>\w+)/,
+    replace: 'This is a $<noun>',
+    message: ({ noun }) => {
+        return `$<noun> is not noun`;
+    },
+    test: ({ noun }) => {
+        return new Tag(noun).type === "noun"
+    }
+};
+
+```
+
 
 ## Changelog
 

diff --git a/packages/nlcst-pattern-match/src/nlcst-pattern-match.ts b/packages/nlcst-pattern-match/src/nlcst-pattern-match.ts
@@ -58,7 +58,7 @@ function matchNode(actualNode: Node, expectedNode: Node): boolean {
     });
 }
 
-function match(parent: Sentence, expectedNode: Sentence) {
+function match(text: string, parent: Sentence, expectedNode: Sentence): MatchResult[] {
     if (!isSentence(parent)) {
         throw new Error(`Expected sentence node: ${JSON.stringify(parent)}`);
     }
@@ -69,7 +69,7 @@ function match(parent: Sentence, expectedNode: Sentence) {
     const expectedChildren = expectedNode.children;
     const tokenCount = expectedChildren.length;
     const matchTokens: Node[] = [];
-    const results: { position: Position | undefined; nodeList: Node[] }[] = [];
+    const results: MatchResult[] = [];
     let currentTokenPosition = 0;
     let index = 0;
     for (index = 0; index < children.length; index++) {
@@ -120,15 +120,19 @@ function match(parent: Sentence, expectedNode: Sentence) {
             matchTokens.length = 0;
             const firstNode = tokens[0];
             const lastNode = tokens[tokens.length - 1];
+            if (!firstNode.position) {
+                throw new Error(`The node has not position: ${firstNode}`);
+            }
+            if (!lastNode.position) {
+                throw new Error(`The node has not position: ${firstNode}`);
+            }
             results.push({
-                position:
-                    firstNode.position && lastNode.position
-                        ? {
-                            start: firstNode.position.start,
-                            end: lastNode.position.end,
-                            index: firstNode.index
-                        }
-                        : undefined,
+                text: text.slice(firstNode.position.start.offset, lastNode.position.end.offset),
+                position: {
+                    start: firstNode.position.start,
+                    end: lastNode.position.end,
+                    index: firstNode.position.start.offset
+                },
                 nodeList: tokens
             });
         }
@@ -149,26 +153,32 @@ export interface PatternNode extends Node {
     pattern: RegExp;
 }
 
+export interface MatchResult {
+    text: string;
+    position: Position | undefined;
+    nodeList: Node[];
+}
+
 export class PatternMatcher {
     private parser: { parse: ((text: string) => Root) };
 
     constructor(args: PatternMatcherArgs) {
         this.parser = args.parser;
     }
 
-    match(text: string, pattern: Sentence) {
+    match(text: string, pattern: Sentence): MatchResult[] {
         if (typeof text !== "string") {
             throw new Error(
                 "Invalid Arguments: match(text: string, pattern: Sentence)\n" +
-                "matcher.match(text, matcher.tag`pattern`)"
+                    "matcher.match(text, matcher.tag`pattern`)"
             );
         }
-        let allResults: { position: Position | undefined; nodeList: Node[] }[] = [];
+        let allResults: MatchResult[] = [];
         const AST = this.parser.parse(text);
         walk(AST, {
-            enter: function (node: Node) {
+            enter: function(node: Node) {
                 if (isSentence(node)) {
-                    const results = match(node, pattern);
+                    const results = match(text, node, pattern);
                     allResults = allResults.concat(results);
                     this.skip();
                 }
@@ -237,7 +247,7 @@ export class PatternMatcher {
         });
         const AST = this.parser.parse(allString);
         walk(AST, {
-            enter: function (node: Node, parent: Parent) {
+            enter: function(node: Node, parent: Parent) {
                 replaceHolders
                     .filter(replaceHolder => {
                         return node.position!.start.offset === replaceHolder.start;

diff --git a/packages/nlcst-pattern-match/test/nlcst-pattern-match-test.ts b/packages/nlcst-pattern-match/test/nlcst-pattern-match-test.ts
@@ -4,7 +4,7 @@ import * as assert from "assert";
 import { isWord } from "nlcst-types";
 import { EnglishParser } from "nlcst-parse-english";
 
-const toString = require('nlcst-to-string');
+const toString = require("nlcst-to-string");
 // const inspect = require('unist-util-inspect');
 
 describe("nlcst-pattern-match", () => {
@@ -119,7 +119,122 @@ ${JSON.stringify(actual)}
     });
 
     describe("#match", () => {
-        it("match and replace", () => {
+        it("should return MatchResult[]", () => {
+            const englishParser = new EnglishParser();
+            const patternMatcher = new PatternMatcher({
+                parser: englishParser
+            });
+            const pattern = patternMatcher.tag`Bob ${{
+                type: "*",
+                data: {
+                    pos: /^VB/ // verb
+                }
+            }} it.`;
+            const text = "Bob does it.";
+            const results = patternMatcher.match(text, pattern);
+            assert.equal(results.length, 1, "results should have 1");
+            const [result] = results;
+            assert.deepEqual(result.position, {
+                index: 0,
+                end: {
+                    column: 13,
+                    line: 1,
+                    offset: 12
+                },
+                start: {
+                    column: 1,
+                    line: 1,
+                    offset: 0
+                }
+            });
+            assert.deepEqual(
+                result.nodeList,
+                [
+                    {
+                        type: "WordNode",
+                        children: [
+                            {
+                                type: "TextNode",
+                                value: "Bob",
+                                position: {
+                                    start: { line: 1, column: 1, offset: 0 },
+                                    end: { line: 1, column: 4, offset: 3 }
+                                }
+                            }
+                        ],
+                        position: {
+                            start: { line: 1, column: 1, offset: 0 },
+                            end: { line: 1, column: 4, offset: 3 }
+                        },
+                        data: { pos: "NNP" }
+                    },
+                    {
+                        type: "WhiteSpaceNode",
+                        value: " ",
+                        position: {
+                            start: { line: 1, column: 4, offset: 3 },
+                            end: { line: 1, column: 5, offset: 4 }
+                        }
+                    },
+                    {
+                        type: "WordNode",
+                        children: [
+                            {
+                                type: "TextNode",
+                                value: "does",
+                                position: {
+                                    start: { line: 1, column: 5, offset: 4 },
+                                    end: { line: 1, column: 9, offset: 8 }
+                                }
+                            }
+                        ],
+                        position: {
+                            start: { line: 1, column: 5, offset: 4 },
+                            end: { line: 1, column: 9, offset: 8 }
+                        },
+                        data: { pos: "VBZ" }
+                    },
+                    {
+                        type: "WhiteSpaceNode",
+                        value: " ",
+                        position: {
+                            start: { line: 1, column: 9, offset: 8 },
+                            end: { line: 1, column: 10, offset: 9 }
+                        }
+                    },
+                    {
+                        type: "WordNode",
+                        children: [
+                            {
+                                type: "TextNode",
+                                value: "it",
+                                position: {
+                                    start: { line: 1, column: 10, offset: 9 },
+                                    end: { line: 1, column: 12, offset: 11 }
+                                }
+                            }
+                        ],
+                        position: {
+                            start: { line: 1, column: 10, offset: 9 },
+                            end: { line: 1, column: 12, offset: 11 }
+                        },
+                        data: { pos: "PRP" }
+                    },
+                    {
+                        type: "PunctuationNode",
+                        value: ".",
+                        position: {
+                            start: { line: 1, column: 12, offset: 11 },
+                            end: { line: 1, column: 13, offset: 12 }
+                        },
+                        data: { pos: "." }
+                    }
+                ],
+                `\n${JSON.stringify(result.nodeList)}\n`
+            );
+            assert.strictEqual(result.text, "Bob does it.");
+        });
+        it("match data and pattern", () => {
             const englishParser = new EnglishParser();
             const patternMatcher = new PatternMatcher({
                 parser: englishParser
@@ -135,7 +250,10 @@ ${JSON.stringify(actual)}
             }}.`;
             const text = "Click Delete if you want to delete the entire document.";
             const results = patternMatcher.match(text, pattern);
-            assert.strictEqual(toString(results[0].nodeList), "Click Delete if you want to delete the entire document.");
+            assert.strictEqual(
+                toString(results[0].nodeList),
+                "Click Delete if you want to delete the entire document."
+            );
         });
         it("match regexp", () => {
             const englishParser = new EnglishParser();