Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TokenRegexp: don't search overlapped tokens
They are needless.
- Loading branch information
Showing
6 changed files
with
180 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
test/command/suite/select/filter/index/regexp/long.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| table_create Memos TABLE_NO_KEY | ||
| [[0,0.0,0.0],true] | ||
| column_create Memos content COLUMN_SCALAR Text | ||
| [[0,0.0,0.0],true] | ||
| table_create RegexpTokens TABLE_PAT_KEY ShortText --default_tokenizer TokenRegexp | ||
| [[0,0.0,0.0],true] | ||
| column_create RegexpTokens memos_content COLUMN_INDEX|WITH_POSITION Memos content | ||
| [[0,0.0,0.0],true] | ||
| load --table Memos | ||
| [ | ||
| {"content": "Groonga"}, | ||
| {"content": "Mroonga"}, | ||
| {"content": "Rroonga and Ruby"} | ||
| ] | ||
| [[0,0.0,0.0],3] | ||
| select Memos --filter 'content @~ "roonga"' | ||
| [ | ||
| [ | ||
| 0, | ||
| 0.0, | ||
| 0.0 | ||
| ], | ||
| [ | ||
| [ | ||
| [ | ||
| 3 | ||
| ], | ||
| [ | ||
| [ | ||
| "_id", | ||
| "UInt32" | ||
| ], | ||
| [ | ||
| "content", | ||
| "Text" | ||
| ] | ||
| ], | ||
| [ | ||
| 1, | ||
| "Groonga" | ||
| ], | ||
| [ | ||
| 2, | ||
| "Mroonga" | ||
| ], | ||
| [ | ||
| 3, | ||
| "Rroonga and Ruby" | ||
| ] | ||
| ] | ||
| ] | ||
| ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| table_create Memos TABLE_NO_KEY | ||
| column_create Memos content COLUMN_SCALAR Text | ||
|
|
||
| table_create RegexpTokens TABLE_PAT_KEY ShortText \ | ||
| --default_tokenizer TokenRegexp | ||
| column_create RegexpTokens memos_content COLUMN_INDEX|WITH_POSITION \ | ||
| Memos content | ||
|
|
||
| load --table Memos | ||
| [ | ||
| {"content": "Groonga"}, | ||
| {"content": "Mroonga"}, | ||
| {"content": "Rroonga and Ruby"} | ||
| ] | ||
|
|
||
| select Memos --filter 'content @~ "roonga"' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| table_create Lexicon TABLE_PAT_KEY ShortText --default_tokenizer TokenRegexp | ||
| [[0,0.0,0.0],true] | ||
| table_tokenize Lexicon "abcdefghijk" --mode ADD | ||
| [ | ||
| [ | ||
| 0, | ||
| 0.0, | ||
| 0.0 | ||
| ], | ||
| [ | ||
| { | ||
| "value": "", | ||
| "position": 0 | ||
| }, | ||
| { | ||
| "value": "ab", | ||
| "position": 1 | ||
| }, | ||
| { | ||
| "value": "bc", | ||
| "position": 2 | ||
| }, | ||
| { | ||
| "value": "cd", | ||
| "position": 3 | ||
| }, | ||
| { | ||
| "value": "de", | ||
| "position": 4 | ||
| }, | ||
| { | ||
| "value": "ef", | ||
| "position": 5 | ||
| }, | ||
| { | ||
| "value": "fg", | ||
| "position": 6 | ||
| }, | ||
| { | ||
| "value": "gh", | ||
| "position": 7 | ||
| }, | ||
| { | ||
| "value": "hi", | ||
| "position": 8 | ||
| }, | ||
| { | ||
| "value": "ij", | ||
| "position": 9 | ||
| }, | ||
| { | ||
| "value": "jk", | ||
| "position": 10 | ||
| }, | ||
| { | ||
| "value": "k", | ||
| "position": 11 | ||
| }, | ||
| { | ||
| "value": "", | ||
| "position": 12 | ||
| } | ||
| ] | ||
| ] | ||
| table_tokenize Lexicon "abcdefghijk" --mode GET | ||
| [ | ||
| [ | ||
| 0, | ||
| 0.0, | ||
| 0.0 | ||
| ], | ||
| [ | ||
| { | ||
| "value": "ab", | ||
| "position": 0 | ||
| }, | ||
| { | ||
| "value": "cd", | ||
| "position": 2 | ||
| }, | ||
| { | ||
| "value": "ef", | ||
| "position": 4 | ||
| }, | ||
| { | ||
| "value": "gh", | ||
| "position": 6 | ||
| }, | ||
| { | ||
| "value": "ij", | ||
| "position": 8 | ||
| }, | ||
| { | ||
| "value": "jk", | ||
| "position": 9 | ||
| } | ||
| ] | ||
| ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| table_create Lexicon TABLE_PAT_KEY ShortText \ | ||
| --default_tokenizer TokenRegexp | ||
| table_tokenize Lexicon "abcdefghijk" --mode ADD | ||
|
|
||
| table_tokenize Lexicon "abcdefghijk" --mode GET |