Navigation Menu

Skip to content

Commit

Permalink
TokenMecab: fix a bug that loose_reading with GET returns extra token
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Sep 12, 2018
1 parent 239d588 commit bd46eba
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
3 changes: 2 additions & 1 deletion plugins/tokenizers/mecab.c
Expand Up @@ -1020,7 +1020,8 @@ mecab_init(grn_ctx *ctx, grn_tokenizer_query *query)

if (tokenizer->options->loose_reading &&
grn_tokenizer_query_get_mode(ctx, tokenizer->query) == GRN_TOKEN_GET) {
while (mecab_next_default_format_consume_token(ctx, tokenizer, NULL) > 0) {
while (tokenizer->next < tokenizer->end &&
mecab_next_default_format_consume_token(ctx, tokenizer, NULL) > 0) {
/* Do nothing */
}
tokenizer->loose.ing = GRN_TRUE;
Expand Down
@@ -0,0 +1,47 @@
table_create Menus TABLE_NO_KEY
[[0,0.0,0.0],true]
column_create Menus name COLUMN_SCALAR Text
[[0,0.0,0.0],true]
table_create Terms TABLE_PAT_KEY ShortText --normalize NormalizerNFKC100 --default_tokenizer 'TokenMecab("loose_reading", true)'
[[0,0.0,0.0],true]
column_create Terms index COLUMN_INDEX|WITH_POSITION Menus name
[[0,0.0,0.0],true]
load --table Menus
[
{"name": "焼肉定食"},
{"name": "やきにく定食"}
]
[[0,0.0,0.0],2]
select Menus --match_columns name --query "焼き肉"
[
[
0,
0.0,
0.0
],
[
[
[
2
],
[
[
"_id",
"UInt32"
],
[
"name",
"Text"
]
],
[
1,
"焼肉定食"
],
[
2,
"やきにく定食"
]
]
]
]
@@ -0,0 +1,15 @@
table_create Menus TABLE_NO_KEY
column_create Menus name COLUMN_SCALAR Text

table_create Terms TABLE_PAT_KEY ShortText \
--normalize NormalizerNFKC100 \
--default_tokenizer 'TokenMecab("loose_reading", true)'
column_create Terms index COLUMN_INDEX|WITH_POSITION Menus name

load --table Menus
[
{"name": "焼肉定食"},
{"name": "やきにく定食"}
]

select Menus --match_columns name --query "焼き肉"

0 comments on commit bd46eba

Please sign in to comment.