diff --git a/lib/db.c b/lib/db.c index 0335c56035..39c40b2680 100644 --- a/lib/db.c +++ b/lib/db.c @@ -5695,15 +5695,15 @@ grn_obj_set_value_column_var_size_vector(grn_ctx *ctx, grn_obj *obj, grn_id id, case GRN_BULK : { unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; if (v && s && - (token = grn_token_open(ctx, lexicon, v, s, - GRN_TOKEN_ADD, token_flags))) { - while (token->status == GRN_TOKEN_DOING) { - grn_id tid = grn_token_next(ctx, token); + (token_cursor = grn_token_cursor_open(ctx, lexicon, v, s, + GRN_TOKEN_ADD, token_flags))) { + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_id tid = grn_token_cursor_next(ctx, token_cursor); grn_uvector_add_element(ctx, &uvector, tid, 0); } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } rc = grn_ja_put(ctx, (grn_ja *)obj, id, GRN_BULK_HEAD(&uvector), GRN_BULK_VSIZE(&uvector), @@ -9859,10 +9859,10 @@ grn_table_tokenize(grn_ctx *ctx, grn_obj *table, const char *str, unsigned int str_len, grn_obj *buf, grn_bool addp) { - grn_token *token = NULL; + grn_token_cursor *token_cursor = NULL; grn_token_mode mode = addp ? GRN_TOKEN_ADD : GRN_TOKEN_GET; GRN_API_ENTER; - if (!(token = grn_token_open(ctx, table, str, str_len, mode, 0))) { + if (!(token_cursor = grn_token_cursor_open(ctx, table, str, str_len, mode, 0))) { goto exit; } if (buf) { @@ -9872,15 +9872,15 @@ grn_table_tokenize(grn_ctx *ctx, grn_obj *table, goto exit; } } - while (token->status != GRN_TOKEN_DONE && token->status != GRN_TOKEN_DONE_SKIP) { + while (token_cursor->status != GRN_TOKEN_DONE && token_cursor->status != GRN_TOKEN_DONE_SKIP) { grn_id tid; - if ((tid = grn_token_next(ctx, token))) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { GRN_RECORD_PUT(ctx, buf, tid); } } exit : - if (token) { - grn_token_close(ctx, token); + if (token_cursor) { + grn_token_cursor_close(ctx, token_cursor); } GRN_API_RETURN(buf); } diff --git a/lib/ii.c b/lib/ii.c index 4f83a79819..5dc203865a 100644 --- a/lib/ii.c +++ b/lib/ii.c @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009-2012 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -4614,26 +4614,26 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr { grn_hash *h; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; grn_rc r, rc = GRN_SUCCESS; grn_vgram_buf *sbuf = NULL; if (!rid) { return GRN_INVALID_ARGUMENT; } - if (!(token = grn_token_open(ctx, lexicon, value, value_len, - GRN_TOKEN_ADD, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, + GRN_TOKEN_ADD, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } if (vgram) { sbuf = grn_vgram_buf_open(value_len); } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_add failed !"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } - while (!token->status) { - (tid = grn_token_next(ctx, token)); + while (!token_cursor->status) { + (tid = grn_token_cursor_next(ctx, token_cursor)); if (tid) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { @@ -4642,14 +4642,14 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, 0)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, 0)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_updspec_add on index_add failed!"); goto exit; } if (sbuf) { grn_vgram_buf_add(sbuf, tid); } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); // todo : support vgram // if (sbuf) { grn_vgram_update(vgram, rid, sbuf, (grn_set *)h); } GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { @@ -4661,7 +4661,7 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr return rc; exit: grn_hash_close(ctx, h); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } @@ -4672,34 +4672,34 @@ index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr { grn_hash *h; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; if (!rid) { return GRN_INVALID_ARGUMENT; } - if (!(token = grn_token_open(ctx, lexicon, value, value_len, - GRN_TOKEN_DEL, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, + GRN_TOKEN_DEL, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_del failed !"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, 0))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on index_del failed !"); grn_hash_close(ctx, h); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { if (*tp) { grn_ii_delete_one(ctx, ii, *tp, *u, NULL); @@ -4738,7 +4738,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i int j; grn_value *v; unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_rc rc = GRN_SUCCESS; grn_hash *old, *new; grn_id tid, *tp; @@ -4756,32 +4756,32 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i goto exit; } for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) { - if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, - GRN_TOKEN_ADD, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, + GRN_TOKEN_ADD, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } if (!GRN_HASH_SIZE(new)) { @@ -4800,26 +4800,26 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i goto exit; } for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) { - if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, - GRN_TOKEN_DEL, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, + GRN_TOKEN_DEL, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; @@ -4827,7 +4827,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } } else { @@ -4872,7 +4872,7 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, int j; grn_id tid; grn_section *v; - grn_token *token; + grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_hash *h = (grn_hash *)out; grn_obj *lexicon = ii->lexicon; @@ -4881,10 +4881,10 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) { unsigned int token_flags = 0; if (v->length && - (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, - mode, token_flags))) { - while (!token->status) { - if ((tid = grn_token_next(ctx, token))) { + (token_cursor = grn_token_cursor_open(ctx, lexicon, head + v->offset, v->length, + mode, token_flags))) { + while (!token_cursor->status) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (posting) { GRN_RECORD_PUT(ctx, posting, tid); } if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; @@ -4892,18 +4892,18 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } - if (grn_ii_updspec_add(ctx, *u, token->pos, v->weight)) { + if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } } @@ -5363,12 +5363,14 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, uint32_t size; grn_rc rc = GRN_END_OF_DATA; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; - grn_token *token = grn_token_open(ctx, lexicon, string, string_len, - GRN_TOKEN_GET, token_flags); + grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon, + string, string_len, + GRN_TOKEN_GET, + token_flags); *only_skip_token = GRN_FALSE; - if (!token) { return GRN_NO_MEMORY_AVAILABLE; } + if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_UNSPLIT) { - if ((ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, token->orig_blen, 0, EX_BOTH))) { + if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH))) { tis[(*n)++] = ti; rc = GRN_SUCCESS; } @@ -5389,26 +5391,26 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, ef = EX_NONE; break; } - tid = grn_token_next(ctx, token); - if (token->force_prefix) { ef |= EX_PREFIX; } - switch (token->status) { + tid = grn_token_cursor_next(ctx, token_cursor); + if (token_cursor->force_prefix) { ef |= EX_PREFIX; } + switch (token_cursor->status) { case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, ef & EX_SUFFIX); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX); break; case GRN_TOKEN_DONE : - ti = token_info_open(ctx, lexicon, ii, (const char *)token->curr, - token->curr_size, 0, ef); + ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, + token_cursor->curr_size, 0, ef); /* key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, token->curr, token->curr_size, token->pos, ef); - ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, - token->orig_blen, token->pos, ef); + ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, + token_cursor->orig_blen, token_cursor->pos, ef); */ break; case GRN_TOKEN_NOT_FOUND : - ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, - token->orig_blen, 0, ef); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, + token_cursor->orig_blen, 0, ef); break; case GRN_TOKEN_DONE_SKIP : *only_skip_token = GRN_TRUE; @@ -5418,24 +5420,24 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, } if (!ti) { goto exit ; } tis[(*n)++] = ti; - while (token->status == GRN_TOKEN_DOING) { - tid = grn_token_next(ctx, token); - switch (token->status) { + while (token_cursor->status == GRN_TOKEN_DOING) { + tid = grn_token_cursor_next(ctx, token_cursor); + switch (token_cursor->status) { case GRN_TOKEN_DONE_SKIP : continue; case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, EX_NONE); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE); break; case GRN_TOKEN_DONE : if (tid) { key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, key, size, token->pos, ef & EX_PREFIX); + ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX); break; } /* else fallthru */ default : - ti = token_info_open(ctx, lexicon, ii, (char *)token->curr, - token->curr_size, token->pos, ef & EX_PREFIX); + ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr, + token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX); break; } if (!ti) { goto exit; } @@ -5444,7 +5446,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, rc = GRN_SUCCESS; } exit : - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); return rc; } @@ -5653,35 +5655,36 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, grn_id tid, *tp, max_size; grn_rc rc = GRN_SUCCESS; grn_hash *h; - grn_token *token; + grn_token_cursor *token_cursor; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; grn_obj *lexicon = ii->lexicon; if (!lexicon || !ii || !string || !string_len || !s || !optarg) { return GRN_INVALID_ARGUMENT; } if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) { return GRN_NO_MEMORY_AVAILABLE; } - if (!(token = grn_token_open(ctx, lexicon, string, string_len, - GRN_TOKEN_GET, token_flags))) { + if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len, + GRN_TOKEN_GET, token_flags))) { grn_hash_close(ctx, h); return GRN_NO_MEMORY_AVAILABLE; } if (!(max_size = optarg->max_size)) { max_size = 1048576; } - while (token->status != GRN_TOKEN_DONE && token->status != GRN_TOKEN_DONE_SKIP) { - if ((tid = grn_token_next(ctx, token))) { + while (token_cursor->status != GRN_TOKEN_DONE && + token_cursor->status != GRN_TOKEN_DONE_SKIP) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&w1, NULL)) { (*w1)++; } } - if (tid && token->curr_size) { + if (tid && token_cursor->curr_size) { if (optarg->max_interval == GRN_OP_UNSPLIT) { - grn_table_search(ctx, lexicon, token->curr, token->curr_size, + grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR); } if (optarg->max_interval == GRN_OP_PARTIAL) { - grn_table_search(ctx, lexicon, token->curr, token->curr_size, + grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR); } } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); { grn_hash_cursor *c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0); if (!c) { @@ -6774,7 +6777,7 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, } if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) { unsigned int token_flags = 0; - grn_token *token; + grn_token_cursor *token_cursor; grn_id *buffer = ii_buffer->block_buf; uint32_t block_pos = ii_buffer->block_pos; buffer[block_pos++] = rid + II_BUFFER_RID_FLAG; @@ -6784,12 +6787,13 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, if (weight) { buffer[block_pos++] = weight + II_BUFFER_WEIGHT_FLAG; } - if ((token = grn_token_open(ctx, tmp_lexicon, value, - value_len, GRN_TOKEN_ADD, token_flags))) { + if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon, + value, value_len, + GRN_TOKEN_ADD, token_flags))) { uint32_t pos; - for (pos = 0; !token->status; pos++) { + for (pos = 0; !token_cursor->status; pos++) { grn_id tid; - if ((tid = grn_token_next(ctx, token))) { + if ((tid = grn_token_cursor_next(ctx, token_cursor))) { ii_buffer_counter *counter; counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid); if (!counter) { return; } @@ -6828,7 +6832,7 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, counter->nposts++; } } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } ii_buffer->block_pos = block_pos; } diff --git a/lib/proc.c b/lib/proc.c index d4c5bd3986..cbfa0c19f3 100644 --- a/lib/proc.c +++ b/lib/proc.c @@ -3391,17 +3391,18 @@ static void tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, grn_token_mode mode, unsigned int flags, grn_obj *tokens) { - grn_token *token; + grn_token_cursor *token_cursor; - token = grn_token_open(ctx, (grn_obj *)lexicon, - GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), - mode, flags); - if (!token) { + token_cursor = + grn_token_cursor_open(ctx, (grn_obj *)lexicon, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + mode, flags); + if (!token_cursor) { return; } - while (token->status == GRN_TOKEN_DOING) { - grn_id token_id = grn_token_next(ctx, token); + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_id token_id = grn_token_cursor_next(ctx, token_cursor); tokenize_token *current_token; if (token_id == GRN_ID_NIL) { continue; @@ -3409,9 +3410,9 @@ tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, grn_token_mode mode, grn_bulk_space(ctx, tokens, sizeof(tokenize_token)); current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1; current_token->id = token_id; - current_token->position = token->pos; + current_token->position = token_cursor->pos; } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } static grn_obj * @@ -3465,15 +3466,16 @@ proc_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) GRN_OBJ_FIN(ctx, &tokens); } else if (MODE_NAME_EQUAL("GET")) { { - grn_token *token; - token = grn_token_open(ctx, (grn_obj *)lexicon, - GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), - GRN_TOKEN_ADD, flags); - if (token) { - while (token->status == GRN_TOKEN_DOING) { - grn_token_next(ctx, token); + grn_token_cursor *token_cursor; + token_cursor = + grn_token_cursor_open(ctx, (grn_obj *)lexicon, + GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), + GRN_TOKEN_ADD, flags); + if (token_cursor) { + while (token_cursor->status == GRN_TOKEN_DOING) { + grn_token_cursor_next(ctx, token_cursor); } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } } diff --git a/lib/token.c b/lib/token.c index 25eca07f3e..f0a0ff7fd0 100644 --- a/lib/token.c +++ b/lib/token.c @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2012 Brazil + Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -496,9 +496,10 @@ grn_token_fin(void) } static void -grn_token_open_initialize_token_filters(grn_ctx *ctx, grn_token *token) +grn_token_cursor_open_initialize_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor) { - grn_obj *token_filters = token->token_filters; + grn_obj *token_filters = token_cursor->token_filters; unsigned int i, n_token_filters; grn_obj mode; @@ -509,22 +510,23 @@ grn_token_open_initialize_token_filters(grn_ctx *ctx, grn_token *token) } if (n_token_filters == 0) { - token->token_filter_ctxs = NULL; + token_cursor->token_filter_ctxs = NULL; return; } - token->token_filter_ctxs = GRN_MALLOC(sizeof(grn_proc_ctx) * n_token_filters); - if (!token->token_filter_ctxs) { + token_cursor->token_filter_ctxs = + GRN_MALLOC(sizeof(grn_proc_ctx) * n_token_filters); + if (!token_cursor->token_filter_ctxs) { ERR(GRN_NO_MEMORY_AVAILABLE, - "[token][open] failed to allocate token filter contexts"); + "[token-cursor][open] failed to allocate token filter contexts"); return; } GRN_UINT32_INIT(&mode, 0); - GRN_UINT32_SET(ctx, &mode, token->mode); + GRN_UINT32_SET(ctx, &mode, token_cursor->mode); for (i = 0; i < n_token_filters; i++) { grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); - grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i]; + grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i]; int n_args = 0; grn_obj *args[2]; @@ -535,7 +537,7 @@ grn_token_open_initialize_token_filters(grn_ctx *ctx, grn_token *token) token_filter_ctx->currh = NULL; token_filter_ctx->phase = PROC_INIT; - args[n_args++] = token->table; + args[n_args++] = token_cursor->table; args[n_args++] = &mode; ((grn_proc *)token_filter)->funcs[PROC_INIT](ctx, n_args, args, @@ -544,12 +546,12 @@ grn_token_open_initialize_token_filters(grn_ctx *ctx, grn_token *token) GRN_OBJ_FIN(ctx, &mode); } - -grn_token * -grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, - grn_token_mode mode, unsigned int flags) +grn_token_cursor * +grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, + const char *str, size_t str_len, + grn_token_mode mode, unsigned int flags) { - grn_token *token; + grn_token_cursor *token_cursor; grn_encoding encoding; grn_obj *tokenizer; grn_obj *normalizer; @@ -559,20 +561,20 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, &normalizer, &token_filters)) { return NULL; } - if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; } - token->table = table; - token->mode = mode; - token->encoding = encoding; - token->tokenizer = tokenizer; - token->token_filters = token_filters; - token->orig = (const unsigned char *)str; - token->orig_blen = str_len; - token->curr = NULL; - token->nstr = NULL; - token->curr_size = 0; - token->pos = -1; - token->status = GRN_TOKEN_DOING; - token->force_prefix = 0; + if (!(token_cursor = GRN_MALLOC(sizeof(grn_token_cursor)))) { return NULL; } + token_cursor->table = table; + token_cursor->mode = mode; + token_cursor->encoding = encoding; + token_cursor->tokenizer = tokenizer; + token_cursor->token_filters = token_filters; + token_cursor->orig = (const unsigned char *)str; + token_cursor->orig_blen = str_len; + token_cursor->curr = NULL; + token_cursor->nstr = NULL; + token_cursor->curr_size = 0; + token_cursor->pos = -1; + token_cursor->status = GRN_TOKEN_DOING; + token_cursor->force_prefix = 0; if (tokenizer) { grn_obj str_, flags_, mode_; GRN_TEXT_INIT(&str_, GRN_OBJ_DO_SHALLOW_COPY); @@ -581,49 +583,52 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, GRN_UINT32_SET(ctx, &flags_, flags); GRN_UINT32_INIT(&mode_, 0); GRN_UINT32_SET(ctx, &mode_, mode); - token->pctx.caller = NULL; - token->pctx.user_data.ptr = NULL; - token->pctx.proc = (grn_proc *)tokenizer; - token->pctx.hooks = NULL; - token->pctx.currh = NULL; - token->pctx.phase = PROC_INIT; + token_cursor->pctx.caller = NULL; + token_cursor->pctx.user_data.ptr = NULL; + token_cursor->pctx.proc = (grn_proc *)tokenizer; + token_cursor->pctx.hooks = NULL; + token_cursor->pctx.currh = NULL; + token_cursor->pctx.phase = PROC_INIT; grn_ctx_push(ctx, &mode_); grn_ctx_push(ctx, &str_); grn_ctx_push(ctx, &flags_); - ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token->pctx.user_data); + ((grn_proc *)tokenizer)->funcs[PROC_INIT](ctx, 1, &table, &token_cursor->pctx.user_data); grn_obj_close(ctx, &flags_); grn_obj_close(ctx, &str_); grn_obj_close(ctx, &mode_); } else { int nflags = 0; - token->nstr = grn_string_open_(ctx, str, str_len, - normalizer, nflags, token->encoding); - if (token->nstr) { + token_cursor->nstr = grn_string_open_(ctx, str, str_len, + normalizer, + nflags, + token_cursor->encoding); + if (token_cursor->nstr) { const char *normalized; - grn_string_get_normalized(ctx, token->nstr, - &normalized, &(token->curr_size), NULL); - token->curr = (const unsigned char *)normalized; + grn_string_get_normalized(ctx, token_cursor->nstr, + &normalized, &(token_cursor->curr_size), NULL); + token_cursor->curr = (const unsigned char *)normalized; } else { - ERR(GRN_TOKENIZER_ERROR, "grn_string_open failed at grn_token_open"); + ERR(GRN_TOKENIZER_ERROR, + "[token-cursor][open] failed to grn_string_open()"); } } - grn_token_open_initialize_token_filters(ctx, token); + grn_token_cursor_open_initialize_token_filters(ctx, token_cursor); if (ctx->rc) { - grn_token_close(ctx, token); - token = NULL; + grn_token_cursor_close(ctx, token_cursor); + token_cursor = NULL; } - return token; + return token_cursor; } static int -grn_token_next_apply_token_filters(grn_ctx *ctx, - grn_token *token, - grn_obj *current_token, - grn_obj *status) +grn_token_cursor_next_apply_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor, + grn_obj *current_token, + grn_obj *status) { - grn_obj *token_filters = token->token_filters; + grn_obj *token_filters = token_cursor->token_filters; unsigned int i, n_token_filters; if (token_filters) { @@ -633,7 +638,7 @@ grn_token_next_apply_token_filters(grn_ctx *ctx, } for (i = 0; i < n_token_filters; i++) { grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); - grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i]; + grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i]; int n_args = 0; grn_obj *args[2]; @@ -655,83 +660,85 @@ grn_token_next_apply_token_filters(grn_ctx *ctx, current_token = grn_ctx_pop(ctx); } - token->curr = (const unsigned char *)GRN_TEXT_VALUE(current_token); - token->curr_size = GRN_TEXT_LEN(current_token); + token_cursor->curr = (const unsigned char *)GRN_TEXT_VALUE(current_token); + token_cursor->curr_size = GRN_TEXT_LEN(current_token); return GRN_INT32_VALUE(status); } grn_id -grn_token_next(grn_ctx *ctx, grn_token *token) +grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) { int status; grn_id tid = GRN_ID_NIL; - grn_obj *table = token->table; - grn_obj *tokenizer = token->tokenizer; - while (token->status != GRN_TOKEN_DONE) { + grn_obj *table = token_cursor->table; + grn_obj *tokenizer = token_cursor->tokenizer; + while (token_cursor->status != GRN_TOKEN_DONE) { if (tokenizer) { grn_obj *curr_, *stat_; - ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data); + ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token_cursor->pctx.user_data); stat_ = grn_ctx_pop(ctx); curr_ = grn_ctx_pop(ctx); - status = grn_token_next_apply_token_filters(ctx, token, curr_, stat_); - token->status = ((status & GRN_TOKENIZER_TOKEN_LAST) || - (token->mode == GRN_TOKEN_GET && - (status & GRN_TOKENIZER_TOKEN_REACH_END))) + status = grn_token_cursor_next_apply_token_filters(ctx, token_cursor, + curr_, stat_); + token_cursor->status = + ((status & GRN_TOKENIZER_TOKEN_LAST) || + (token_cursor->mode == GRN_TOKEN_GET && + (status & GRN_TOKENIZER_TOKEN_REACH_END))) ? GRN_TOKEN_DONE : GRN_TOKEN_DOING; - token->force_prefix = 0; + token_cursor->force_prefix = 0; #define SKIP_FLAGS \ (GRN_TOKENIZER_TOKEN_SKIP | GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) if (status & SKIP_FLAGS) { if (status & GRN_TOKENIZER_TOKEN_SKIP) { - token->pos++; + token_cursor->pos++; } - if (token->status == GRN_TOKEN_DONE && tid == GRN_ID_NIL) { - token->status = GRN_TOKEN_DONE_SKIP; + if (token_cursor->status == GRN_TOKEN_DONE && tid == GRN_ID_NIL) { + token_cursor->status = GRN_TOKEN_DONE_SKIP; break; } else { continue; } } #undef SKIP_FLAGS - if (token->curr_size == 0) { + if (token_cursor->curr_size == 0) { char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE]; int tokenizer_name_length; tokenizer_name_length = - grn_obj_name(ctx, token->tokenizer, + grn_obj_name(ctx, token_cursor->tokenizer, tokenizer_name, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_WARN, "[token_next] ignore an empty token: <%.*s>: <%.*s>", tokenizer_name_length, tokenizer_name, - token->orig_blen, token->orig); + token_cursor->orig_blen, token_cursor->orig); continue; } - if (token->curr_size > GRN_TABLE_MAX_KEY_SIZE) { + if (token_cursor->curr_size > GRN_TABLE_MAX_KEY_SIZE) { GRN_LOG(ctx, GRN_WARN, "[token_next] ignore too long token. " "Token must be less than or equal to %d: <%d>(<%.*s>)", GRN_TABLE_MAX_KEY_SIZE, - token->curr_size, - token->curr_size, token->curr); + token_cursor->curr_size, + token_cursor->curr_size, token_cursor->curr); continue; } if (status & GRN_TOKENIZER_TOKEN_UNMATURED) { if (status & GRN_TOKENIZER_TOKEN_OVERLAP) { - if (token->mode == GRN_TOKEN_GET) { token->pos++; continue; } + if (token_cursor->mode == GRN_TOKEN_GET) { token_cursor->pos++; continue; } } else { - if (status & GRN_TOKENIZER_TOKEN_LAST) { token->force_prefix = 1; } + if (status & GRN_TOKENIZER_TOKEN_LAST) { token_cursor->force_prefix = 1; } } } } else { - token->status = GRN_TOKEN_DONE; + token_cursor->status = GRN_TOKEN_DONE; } - if (token->mode == GRN_TOKEN_ADD) { + if (token_cursor->mode == GRN_TOKEN_ADD) { switch (table->header.type) { case GRN_TABLE_PAT_KEY : if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_pat_add(ctx, (grn_pat *)table, token->curr, token->curr_size, + tid = grn_pat_add(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_pat *)table)->io); } @@ -740,7 +747,7 @@ grn_token_next(grn_ctx *ctx, grn_token *token) if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_dat_add(ctx, (grn_dat *)table, token->curr, token->curr_size, + tid = grn_dat_add(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_dat *)table)->io); } @@ -749,14 +756,14 @@ grn_token_next(grn_ctx *ctx, grn_token *token) if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { - tid = grn_hash_add(ctx, (grn_hash *)table, token->curr, token->curr_size, + tid = grn_hash_add(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_hash *)table)->io); } break; case GRN_TABLE_NO_KEY : - if (token->curr_size == sizeof(grn_id)) { - tid = *((grn_id *)token->curr); + if (token_cursor->curr_size == sizeof(grn_id)) { + tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } @@ -765,36 +772,37 @@ grn_token_next(grn_ctx *ctx, grn_token *token) } else { switch (table->header.type) { case GRN_TABLE_PAT_KEY : - tid = grn_pat_get(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL); + tid = grn_pat_get(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_DAT_KEY : - tid = grn_dat_get(ctx, (grn_dat *)table, token->curr, token->curr_size, NULL); + tid = grn_dat_get(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_HASH_KEY : - tid = grn_hash_get(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL); + tid = grn_hash_get(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_NO_KEY : - if (token->curr_size == sizeof(grn_id)) { - tid = *((grn_id *)token->curr); + if (token_cursor->curr_size == sizeof(grn_id)) { + tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } break; } } - if (tid == GRN_ID_NIL && token->status != GRN_TOKEN_DONE) { - token->status = GRN_TOKEN_NOT_FOUND; + if (tid == GRN_ID_NIL && token_cursor->status != GRN_TOKEN_DONE) { + token_cursor->status = GRN_TOKEN_NOT_FOUND; } - token->pos++; + token_cursor->pos++; break; } return tid; } static void -grn_token_close_token_filters(grn_ctx *ctx, grn_token *token) +grn_token_cursor_close_token_filters(grn_ctx *ctx, + grn_token_cursor *token_cursor) { - grn_obj *token_filters = token->token_filters; + grn_obj *token_filters = token_cursor->token_filters; unsigned int i, n_token_filters; if (token_filters) { @@ -804,32 +812,32 @@ grn_token_close_token_filters(grn_ctx *ctx, grn_token *token) } for (i = 0; i < n_token_filters; i++) { grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); - grn_proc_ctx *token_filter_ctx = &token->token_filter_ctxs[i]; + grn_proc_ctx *token_filter_ctx = &token_cursor->token_filter_ctxs[i]; ((grn_proc *)token_filter)->funcs[PROC_FIN](ctx, 1, - &token->table, + &token_cursor->table, &token_filter_ctx->user_data); } - if (token->token_filter_ctxs) { - GRN_FREE(token->token_filter_ctxs); + if (token_cursor->token_filter_ctxs) { + GRN_FREE(token_cursor->token_filter_ctxs); } } grn_rc -grn_token_close(grn_ctx *ctx, grn_token *token) +grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor) { - if (token) { - if (token->tokenizer) { - ((grn_proc *)token->tokenizer)->funcs[PROC_FIN](ctx, 1, &token->table, - &token->pctx.user_data); + if (token_cursor) { + if (token_cursor->tokenizer) { + ((grn_proc *)token_cursor->tokenizer)->funcs[PROC_FIN](ctx, 1, &token_cursor->table, + &token_cursor->pctx.user_data); } - grn_token_close_token_filters(ctx, token); - if (token->nstr) { - grn_obj_close(ctx, token->nstr); + grn_token_cursor_close_token_filters(ctx, token_cursor); + if (token_cursor->nstr) { + grn_obj_close(ctx, token_cursor->nstr); } - GRN_FREE(token); + GRN_FREE(token_cursor); return GRN_SUCCESS; } else { return GRN_INVALID_ARGUMENT; diff --git a/lib/token.h b/lib/token.h index 938597e09a..89a2eb0870 100644 --- a/lib/token.h +++ b/lib/token.h @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2 -*- */ -/* Copyright(C) 2009 Brazil +/* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -64,7 +64,7 @@ typedef struct { grn_proc_ctx *token_filter_ctxs; uint32_t variant; grn_obj *nstr; -} grn_token; +} grn_token_cursor; extern grn_obj *grn_token_uvector; @@ -73,12 +73,13 @@ grn_rc grn_token_fin(void); #define GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER (0x01L<<0) -GRN_API grn_token *grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, - size_t str_len, grn_token_mode mode, - unsigned int flags); +GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, + const char *str, size_t str_len, + grn_token_mode mode, + unsigned int flags); -GRN_API grn_id grn_token_next(grn_ctx *ctx, grn_token *ng); -GRN_API grn_rc grn_token_close(grn_ctx *ctx, grn_token *ng); +GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor); +GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor); grn_rc grn_db_init_mecab_tokenizer(grn_ctx *ctx); grn_rc grn_db_init_builtin_tokenizers(grn_ctx *ctx); diff --git a/plugins/suggest/suggest.c b/plugins/suggest/suggest.c index ea7b6adbf6..3e291b96d9 100644 --- a/plugins/suggest/suggest.c +++ b/plugins/suggest/suggest.c @@ -1,5 +1,5 @@ /* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ -/* Copyright(C) 2010-2013 Brazil +/* Copyright(C) 2010-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -888,14 +888,15 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) int keylen = grn_table_get_key(ctx, learner->items, learner->post_item_id, keybuf, GRN_TABLE_MAX_KEY_SIZE); unsigned int token_flags = 0; - grn_token *token = grn_token_open(ctx, learner->items, keybuf, keylen, - GRN_TOKEN_ADD, token_flags); - if (token) { + grn_token_cursor *token_cursor = + grn_token_cursor_open(ctx, learner->items, keybuf, keylen, + GRN_TOKEN_ADD, token_flags); + if (token_cursor) { grn_id tid; grn_obj *pre_item = &(learner->pre_item); grn_obj *post_item = learner->post_item; grn_hash *token_ids = NULL; - while ((tid = grn_token_next(ctx, token)) && tid != learner->post_item_id) { + while ((tid = grn_token_cursor_next(ctx, token_cursor)) && tid != learner->post_item_id) { uint64_t key; int added; grn_id pair_id; @@ -924,7 +925,7 @@ learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) if (token_ids) { grn_hash_close(ctx, token_ids); } - grn_token_close(ctx, token); + grn_token_cursor_close(ctx, token_cursor); } }