Navigation Menu

Skip to content

Commit

Permalink
mecab: fix a bug that -Owakati is ignored
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Oct 17, 2015
1 parent a91f283 commit ea62d52
Showing 1 changed file with 66 additions and 57 deletions.
123 changes: 66 additions & 57 deletions plugins/tokenizers/mecab.c
Expand Up @@ -271,6 +271,57 @@ chunked_tokenize_utf8(grn_ctx *ctx,
}
}

static mecab_t *
mecab_create(grn_ctx *ctx)
{
mecab_t *mecab;
int argc = 0;
const char *argv[4];

argv[argc++] = "Groonga";
argv[argc++] = "-Owakati";
#ifdef GRN_WITH_BUNDLED_MECAB
argv[argc++] = "--rcfile";
# ifdef WIN32
{
static char windows_mecab_rc_file[PATH_MAX];

grn_strcpy(windows_mecab_rc_file,
PATH_MAX,
grn_plugin_windows_base_dir());
grn_strcat(windows_mecab_rc_file,
PATH_MAX,
"/");
grn_strcat(windows_mecab_rc_file,
PATH_MAX,
GRN_BUNDLED_MECAB_RELATIVE_RC_PATH);
argv[argc++] = windows_mecab_rc_file;
}
# else /* WIN32 */
argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH;
# endif /* WIN32 */
#endif /* GRN_WITH_BUNDLED_MECAB */
mecab = mecab_new(argc, (char **)argv);

if (!mecab) {
GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
"[tokenizer][mecab] "
"failed to create mecab_t: mecab_new("
"\"%s\""
#ifdef GRN_WITH_BUNDLED_MECAB
", \"%s\", \"%s\""
#endif /* GRN_WITH_BUNDLED_MECAB */
"): %s",
argv[0],
#ifdef GRN_WITH_BUNDLED_MECAB
argv[1], argv[2],
#endif /* GRN_WITH_BUNDLED_MECAB */
mecab_global_error_message());
}

return mecab;
}

/*
This function is called for a full text search query or a document to be
indexed. This means that both short/long strings are given.
Expand All @@ -294,46 +345,8 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
if (!sole_mecab) {
grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
if (!sole_mecab) {
int argc = 0;
const char *argv[3];
argv[argc++] = "-Owakati";
#ifdef GRN_WITH_BUNDLED_MECAB
argv[argc++] = "--rcfile";
# ifdef WIN32
{
static char windows_mecab_rc_file[PATH_MAX];

grn_strcpy(windows_mecab_rc_file,
PATH_MAX,
grn_plugin_windows_base_dir());
grn_strcat(windows_mecab_rc_file,
PATH_MAX,
"/");
grn_strcat(windows_mecab_rc_file,
PATH_MAX,
GRN_BUNDLED_MECAB_RELATIVE_RC_PATH);
argv[argc++] = windows_mecab_rc_file;
}
# else /* WIN32 */
argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH;
# endif /* WIN32 */
#endif /* GRN_WITH_BUNDLED_MECAB */
sole_mecab = mecab_new(argc, (char **)argv);
if (!sole_mecab) {
GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
"[tokenizer][mecab] "
"mecab_new("
"\"%s\""
#ifdef GRN_WITH_BUNDLED_MECAB
", \"%s\", \"%s\""
#endif /* GRN_WITH_BUNDLED_MECAB */
") failed on mecab_init(): %s",
argv[0],
#ifdef GRN_WITH_BUNDLED_MECAB
argv[1], argv[2],
#endif /* GRN_WITH_BUNDLED_MECAB */
mecab_global_error_message());
} else {
sole_mecab = mecab_create(ctx);
if (sole_mecab) {
sole_mecab_encoding = get_mecab_encoding(sole_mecab);
}
}
Expand Down Expand Up @@ -512,28 +525,24 @@ check_mecab_dictionary_encoding(grn_ctx *ctx)
{
#ifdef HAVE_MECAB_DICTIONARY_INFO_T
mecab_t *mecab;
grn_encoding encoding;
grn_bool have_same_encoding_dictionary;

mecab = mecab_new2("-Owakati");
if (mecab) {
grn_encoding encoding;
grn_bool have_same_encoding_dictionary;
mecab = mecab_create(ctx);
if (!mecab) {
return;
}

encoding = GRN_CTX_GET_ENCODING(ctx);
have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab));
mecab_destroy(mecab);
encoding = GRN_CTX_GET_ENCODING(ctx);
have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab));
mecab_destroy(mecab);

if (!have_same_encoding_dictionary) {
GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
"[tokenizer][mecab] "
"MeCab has no dictionary that uses the context encoding"
": <%s>",
grn_encoding_to_string(encoding));
}
} else {
if (!have_same_encoding_dictionary) {
GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
"[tokenizer][mecab] "
"mecab_new2 failed in check_mecab_dictionary_encoding: %s",
mecab_global_error_message());
"MeCab has no dictionary that uses the context encoding"
": <%s>",
grn_encoding_to_string(encoding));
}
#endif
}
Expand Down

0 comments on commit ea62d52

Please sign in to comment.