Navigation Menu

Skip to content

Commit

Permalink
tokenize: add valid normalizer check
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jun 5, 2014
1 parent d9183e7 commit d0873c7
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 5 deletions.
15 changes: 14 additions & 1 deletion lib/proc.c
Expand Up @@ -3203,11 +3203,24 @@ create_lexicon_for_tokenize(grn_ctx *ctx,
if (!normalizer) {
grn_obj_unlink(ctx, tokenizer);
ERR(GRN_INVALID_ARGUMENT,
"[tokenize] unknown normalizer: <%.*s>",
"[tokenize] nonexistent normalizer: <%.*s>",
(int)GRN_TEXT_LEN(normalizer_name),
GRN_TEXT_VALUE(normalizer_name));
return NULL;
}

if (!is_normalizer(ctx, normalizer)) {
grn_obj inspected;
grn_obj_unlink(ctx, tokenizer);
GRN_TEXT_INIT(&inspected, 0);
grn_inspect(ctx, &inspected, normalizer);
ERR(GRN_INVALID_ARGUMENT,
"[tokenize] not normalizer: %.*s",
(int)GRN_TEXT_LEN(&inspected),
GRN_TEXT_VALUE(&inspected));
GRN_OBJ_FIN(ctx, &inspected);
return NULL;
}
}

lexicon = grn_hash_create(ctx, NULL, GRN_TABLE_MAX_KEY_SIZE, 0,
Expand Down
15 changes: 15 additions & 0 deletions test/command/suite/tokenize/invalid/normalizer/invalid.expected
@@ -0,0 +1,15 @@
tokenize TokenBigram "aBcDe 123" TokenDelimit
[
[
[
-22,
0.0,
0.0
],
"[tokenize] not normalizer: #<proc:tokenizer TokenDelimit arguments:[$1, $2, $3]>"
],
[

]
]
#|e| [tokenize] not normalizer: #<proc:tokenizer TokenDelimit arguments:[$1, $2, $3]>
@@ -0,0 +1 @@
tokenize TokenBigram "aBcDe 123" TokenDelimit
@@ -0,0 +1,15 @@
tokenize TokenBigram "aBcDe 123" NormalizerNonexistent
[
[
[
-22,
0.0,
0.0
],
"[tokenize] nonexistent normalizer: <NormalizerNonexistent>"
],
[

]
]
#|e| [tokenize] nonexistent normalizer: <NormalizerNonexistent>
@@ -0,0 +1 @@
tokenize TokenBigram "aBcDe 123" NormalizerNonexistent

This file was deleted.

This file was deleted.

0 comments on commit d0873c7

Please sign in to comment.