Navigation Menu

Skip to content

Commit

Permalink
tokenize: add valid tokenizer check
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jun 5, 2014
1 parent d0873c7 commit 7d11c2f
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 5 deletions.
29 changes: 28 additions & 1 deletion lib/proc.c
Expand Up @@ -2958,6 +2958,20 @@ is_normalizer(grn_ctx *ctx, grn_obj *object)
return GRN_TRUE;
}

static grn_bool
is_tokenizer(grn_ctx *ctx, grn_obj *object)
{
if (object->header.type != GRN_PROC) {
return GRN_FALSE;
}

if (grn_proc_get_type(ctx, object) != GRN_PROC_TOKENIZER) {
return GRN_FALSE;
}

return GRN_TRUE;
}

static const char *
char_type_name(grn_char_type type)
{
Expand Down Expand Up @@ -3190,12 +3204,25 @@ create_lexicon_for_tokenize(grn_ctx *ctx,
GRN_TEXT_LEN(tokenizer_name));
if (!tokenizer) {
ERR(GRN_INVALID_ARGUMENT,
"[tokenize] unknown tokenizer: <%.*s>",
"[tokenize] nonexistent tokenizer: <%.*s>",
(int)GRN_TEXT_LEN(tokenizer_name),
GRN_TEXT_VALUE(tokenizer_name));
return NULL;
}

if (!is_tokenizer(ctx, tokenizer)) {
grn_obj inspected;
GRN_TEXT_INIT(&inspected, 0);
grn_inspect(ctx, &inspected, tokenizer);
ERR(GRN_INVALID_ARGUMENT,
"[tokenize] not tokenizer: %.*s",
(int)GRN_TEXT_LEN(&inspected),
GRN_TEXT_VALUE(&inspected));
GRN_OBJ_FIN(ctx, &inspected);
grn_obj_unlink(ctx, tokenizer);
return NULL;
}

if (GRN_TEXT_LEN(normalizer_name) > 0) {
normalizer = grn_ctx_get(ctx,
GRN_TEXT_VALUE(normalizer_name),
Expand Down
15 changes: 15 additions & 0 deletions test/command/suite/tokenize/invalid/tokenizer/invalid.expected
@@ -0,0 +1,15 @@
tokenize NormalizerAuto "aBcDe 123"
[
[
[
-22,
0.0,
0.0
],
"[tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]>"
],
[

]
]
#|e| [tokenize] not tokenizer: #<proc:normalizer NormalizerAuto arguments:[$1]>
1 change: 1 addition & 0 deletions test/command/suite/tokenize/invalid/tokenizer/invalid.test
@@ -0,0 +1 @@
tokenize NormalizerAuto "aBcDe 123"
@@ -0,0 +1,3 @@
tokenize TokenNonexistent "aBcDe 123"
[[[-22,0.0,0.0],"[tokenize] nonexistent tokenizer: <TokenNonexistent>"],[]]
#|e| [tokenize] nonexistent tokenizer: <TokenNonexistent>
@@ -0,0 +1 @@
tokenize TokenNonexistent "aBcDe 123"

This file was deleted.

1 change: 0 additions & 1 deletion test/command/suite/tokenize/invalid/tokenizer/unknown.test

This file was deleted.

0 comments on commit 7d11c2f

Please sign in to comment.