Navigation Menu

Skip to content

Commit

Permalink
Implement no NUL-terminated support strstr()
Browse files Browse the repository at this point in the history
It should be implemented by more fast algorithm such as Boyer-Moor
algorithm. The algorithm is used in snip.c.
  • Loading branch information
kou committed Jan 12, 2015
1 parent 34d228a commit b1e3db9
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 52 deletions.
60 changes: 42 additions & 18 deletions lib/expr.c
Expand Up @@ -2665,6 +2665,30 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
} \
} while (0)

static grn_bool
string_is_contained(grn_ctx *ctx,
const char *text, unsigned int text_len,
const char *sub_text, unsigned int sub_text_len)
{
/* TODO: Use more fast algorithm such as Boyer-Moore algorithm that
* is used in snip.c. */
const char *text_end = text + text_len;
unsigned int sub_text_current = 0;

for (; text < text_end; text++) {
if (text[0] == sub_text[sub_text_current]) {
sub_text_current++;
if (sub_text_current == sub_text_len) {
return GRN_TRUE;
}
} else {
sub_text_current = 0;
}
}

return GRN_FALSE;
}

static grn_bool
pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
const char *x, unsigned int x_len,
Expand All @@ -2675,15 +2699,22 @@ pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
grn_obj *norm_y;
const char *norm_x_raw;
const char *norm_y_raw;
unsigned int norm_x_raw_length_in_bytes;
unsigned int norm_y_raw_length_in_bytes;
grn_bool matched = GRN_FALSE;

normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
norm_x = grn_string_open(ctx, x, x_len, normalizer, 0);
norm_y = grn_string_open(ctx, y, y_len, normalizer, 0);
grn_string_get_normalized(ctx, norm_x, &norm_x_raw, NULL, NULL);
grn_string_get_normalized(ctx, norm_y, &norm_y_raw, NULL, NULL);
/* normalized str doesn't contain '\0'. */
matched = (strstr(norm_x_raw, norm_y_raw) != NULL);
grn_string_get_normalized(ctx, norm_x,
&norm_x_raw, &norm_x_raw_length_in_bytes,
NULL);
grn_string_get_normalized(ctx, norm_y,
&norm_y_raw, &norm_y_raw_length_in_bytes,
NULL);
matched = string_is_contained(ctx,
norm_x_raw, norm_x_raw_length_in_bytes,
norm_y_raw, norm_y_raw_length_in_bytes);

grn_obj_close(ctx, norm_x);
grn_obj_close(ctx, norm_y);
Expand Down Expand Up @@ -2711,22 +2742,15 @@ pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table,
if (normalizer) {
grn_obj *norm_y;
const char *norm_y_raw;
unsigned int norm_y_raw_length_in_bytes;
norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
normalizer, 0);
grn_string_get_normalized(ctx, norm_y, &norm_y_raw, NULL, NULL);

if (x_key_len == GRN_TABLE_MAX_KEY_SIZE) {
grn_obj x_key_buffer;
GRN_TEXT_INIT(&x_key_buffer, 0);
GRN_TEXT_PUT(ctx, &x_key_buffer, x_key, x_key_len);
GRN_TEXT_PUTC(ctx, &x_key_buffer, '\0');
matched = (strstr(GRN_TEXT_VALUE(&x_key_buffer), norm_y_raw) != NULL);
GRN_OBJ_FIN(ctx, &x_key_buffer);
} else {
x_key[x_key_len] = '\0';
matched = (strstr(x_key, norm_y_raw) != NULL);
}

grn_string_get_normalized(ctx, norm_y,
&norm_y_raw, &norm_y_raw_length_in_bytes,
NULL);
matched = string_is_contained(ctx,
x_key, x_key_len,
norm_y_raw, norm_y_raw_length_in_bytes);
grn_obj_close(ctx, norm_y);
} else {
matched = pseudo_query_scan_raw_text_raw_text(ctx,
Expand Down
35 changes: 2 additions & 33 deletions test/command/suite/select/query/match/no_index.expected
Expand Up @@ -9,36 +9,5 @@ load --table Users
{"name": "Carlos"}
]
[[0,0.0,0.0],3]
select Users --query name:@a
[
[
0,
0.0,
0.0
],
[
[
[
2
],
[
[
"_id",
"UInt32"
],
[
"name",
"ShortText"
]
],
[
1,
"Alice"
],
[
3,
"Carlos"
]
]
]
]
select Users --query name:@aRl
[[0,0.0,0.0],[[[1],[["_id","UInt32"],["name","ShortText"]],[3,"Carlos"]]]]
2 changes: 1 addition & 1 deletion test/command/suite/select/query/match/no_index.test
Expand Up @@ -8,4 +8,4 @@ load --table Users
{"name": "Carlos"}
]

select Users --query name:@a
select Users --query name:@aRl

0 comments on commit b1e3db9

Please sign in to comment.