Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

near search: add min_interval #1520

Merged
merged 52 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7e21e25
Add min_interval for syntaxx
HashidaTKS Feb 10, 2023
b4b9a08
Fix parsing bug
HashidaTKS Feb 10, 2023
100b970
Assign select_optarg->min_interval
HashidaTKS Feb 10, 2023
f920f36
Modify scan_info_data.rb for adding min_interval
HashidaTKS Feb 10, 2023
9e975dc
Add min_interval initialization in selector_in_values
HashidaTKS Feb 10, 2023
b33d4fb
Fix missing implementation
HashidaTKS Feb 11, 2023
ac642aa
Fix the place of adding min_interval
HashidaTKS Feb 12, 2023
0409d1a
Fix a typo
HashidaTKS Feb 13, 2023
033aa9c
Add min_interval logic
HashidaTKS Feb 15, 2023
50996d6
Add min_interval logic
HashidaTKS Feb 17, 2023
cc418d5
Modify default of min_interval
HashidaTKS Feb 20, 2023
d46a7b1
Fix if condition for comparison
HashidaTKS Feb 20, 2023
481ebcc
Add tests
HashidaTKS Feb 20, 2023
5e0ee74
Add min_interval check
HashidaTKS Feb 21, 2023
a51e423
*ONP: skip if "interval == 0"
HashidaTKS Feb 21, 2023
50edc47
Add tests for query
HashidaTKS Feb 21, 2023
42162ca
Add tests for filter
HashidaTKS Feb 21, 2023
63ba3ec
Remove temporary changes
HashidaTKS Feb 21, 2023
7abbe06
Fix a bug that some max_element_interval check is
HashidaTKS Feb 21, 2023
57fccbe
Fix a typo (overrap -> overlap)
HashidaTKS Feb 22, 2023
b50a988
Rename tests path
HashidaTKS Feb 22, 2023
c8a4676
Specify default min_interval
HashidaTKS Feb 22, 2023
4d44c4e
Return with two lines
HashidaTKS Feb 22, 2023
e819b54
Modify "or" to "and"
HashidaTKS Feb 22, 2023
da46302
Align indents
HashidaTKS Feb 22, 2023
a950597
Align indent
HashidaTKS Feb 22, 2023
26eecae
Fix test cases in order to use different values for max and min
HashidaTKS Feb 22, 2023
0782c50
Add invalid min_interval input check
HashidaTKS Feb 22, 2023
b0307f0
Align position
HashidaTKS Feb 22, 2023
bc063c6
Refacter grn_ecmascript.c
HashidaTKS Feb 24, 2023
59f5027
Remove extra space
HashidaTKS Feb 24, 2023
0e41c57
Simplify grn_ii_select_data_find_phrase_product
HashidaTKS Feb 24, 2023
872065d
Fix tests
HashidaTKS Feb 24, 2023
c07da90
Add tests for multiple near search cases in filter
HashidaTKS Feb 24, 2023
51e3fae
Add tests for multiple near search cases in query
HashidaTKS Feb 24, 2023
0d0131c
Reduce variable scope
HashidaTKS Feb 24, 2023
9125cbf
Keep backward commpatiblity
HashidaTKS Feb 24, 2023
39421f6
Assign correct value
HashidaTKS Feb 24, 2023
0458ed6
Align function position
HashidaTKS Feb 24, 2023
14d5f8e
Rename GRN_SELECT_DEFAULT_NEAR_MIN_INTERVAL
HashidaTKS Feb 24, 2023
c45b988
Add tests for using with additional_last_interval
HashidaTKS Feb 27, 2023
6989d4e
Apply the feedback
HashidaTKS Feb 27, 2023
b1f253c
Revert needless change
HashidaTKS Feb 27, 2023
408c90b
Modify while(0) to while(false)
HashidaTKS Feb 27, 2023
9c3f95c
Make min_interval in _grn_select_optarg int
HashidaTKS Feb 27, 2023
1f0fb82
Update lib/table_selector.c
HashidaTKS Feb 27, 2023
be41036
Align search_options.min_interval
HashidaTKS Feb 27, 2023
a0cf08c
parentheses
HashidaTKS Feb 27, 2023
68d90c3
Fix matching test cases
HashidaTKS Feb 27, 2023
0acb91c
Add a missing space between while and (false)
kou Feb 27, 2023
c60b464
Revert needless style change
kou Feb 27, 2023
ce669e8
Use the same order
kou Feb 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions include/groonga/groonga.h
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ struct _grn_search_optarg {
float weight_float;
grn_obj *query_options;
grn_obj *max_element_intervals;
int *min_interval;
};

GRN_API grn_rc grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query,
Expand Down
65 changes: 63 additions & 2 deletions lib/expr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1801,6 +1801,7 @@ grn_scan_info_free(grn_ctx *ctx,
(si)->max_interval = DEFAULT_MAX_INTERVAL;\
(si)->additional_last_interval = DEFAULT_ADDITIONAL_LAST_INTERVAL;\
GRN_INT32_INIT(&(si)->max_element_intervals, GRN_OBJ_VECTOR);\
(si)->min_interval = GRN_II_DEFAULT_NEAR_MIN_INTERVAL;\
(si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\
(si)->quorum_threshold = DEFAULT_QUORUM_THRESHOLD;\
(si)->start = (st);\
Expand Down Expand Up @@ -2305,6 +2306,18 @@ grn_scan_info_set_max_element_intervals(grn_ctx *ctx,
grn_uvector_copy(ctx, max_element_intervals, &(si->max_element_intervals));
}

int
grn_scan_info_get_min_interval(scan_info *si)
{
return si->min_interval;
}

void
grn_scan_info_set_min_interval(scan_info *si, int min_interval)
{
si->min_interval = min_interval;
}

int
grn_scan_info_get_similarity_threshold(scan_info *si)
{
Expand Down Expand Up @@ -2737,11 +2750,15 @@ scan_info_build_match(grn_ctx *ctx, scan_info *si, float weight)
*p == si->args[2] &&
(*p)->header.domain == GRN_DB_INT32) {
si->max_interval = GRN_INT32_VALUE(*p);
} else if (si->nargs == 4 &&
} else if (si->nargs >= 4 &&
*p == si->args[3] &&
(*p)->header.domain == GRN_DB_INT32 &&
grn_obj_is_uvector(ctx, *p)) {
grn_uvector_copy(ctx, *p, &(si->max_element_intervals));
} else if (si->nargs == 5 &&
*p == si->args[4] &&
(*p)->header.domain == GRN_DB_INT32) {
si->min_interval = GRN_INT32_VALUE(*p);
} else {
si->query = *p;
}
Expand All @@ -2759,11 +2776,15 @@ scan_info_build_match(grn_ctx *ctx, scan_info *si, float weight)
*p == si->args[3] &&
(*p)->header.domain == GRN_DB_INT32) {
si->additional_last_interval = GRN_INT32_VALUE(*p);
} else if (si->nargs == 5 &&
} else if (si->nargs >= 5 &&
*p == si->args[4] &&
(*p)->header.domain == GRN_DB_INT32 &&
grn_obj_is_uvector(ctx, *p)) {
grn_uvector_copy(ctx, *p, &(si->max_element_intervals));
} else if (si->nargs == 6 &&
*p == si->args[5] &&
(*p)->header.domain == GRN_DB_INT32) {
si->min_interval = GRN_INT32_VALUE(*p);
} else {
si->query = *p;
}
Expand Down Expand Up @@ -3823,6 +3844,7 @@ typedef struct {
grn_obj max_interval_stack;
grn_obj additional_last_interval_stack;
grn_obj max_element_intervals_stack;
grn_obj min_interval_stack;
grn_obj similarity_threshold_stack;
grn_obj quorum_threshold_stack;
grn_obj weight_stack;
Expand Down Expand Up @@ -3874,6 +3896,7 @@ typedef struct {
int32_t max_interval;
int32_t additional_last_interval;
grn_obj *max_element_intervals;
int32_t min_interval;
/* near is defined by Visual C++. */
#ifdef near
# undef near
Expand All @@ -3894,6 +3917,7 @@ parse_near_options(efs_info *q,
parse_query_op_data *data)
{
const char *end;
data->options.near.min_interval = GRN_II_DEFAULT_NEAR_MIN_INTERVAL;
data->options.near.max_interval = grn_atoi(start, q->str_end, &end);
if (start == end) {
data->options.near.max_interval = DEFAULT_MAX_INTERVAL;
Expand Down Expand Up @@ -3947,6 +3971,18 @@ parse_near_options(efs_info *q,
}
}

if (end < q->str_end && end[0] == ',') {
const char *min_interval_start = end + 1;
data->options.near.min_interval =
grn_atoi(min_interval_start,
q->str_end,
&end);
HashidaTKS marked this conversation as resolved.
Show resolved Hide resolved
if (min_interval_start == end) {
data->options.near.min_interval = GRN_II_DEFAULT_NEAR_MIN_INTERVAL;
return end;
}
}
HashidaTKS marked this conversation as resolved.
Show resolved Hide resolved

return end;
}

Expand Down Expand Up @@ -4230,6 +4266,13 @@ parse_query_accept_string(grn_ctx *ctx, efs_info *efsi,
1);
grn_expr_take_obj(ctx, efsi->e, max_element_intervals);
n_args++;
int min_interval = grn_int32_value_at(&efsi->min_interval_stack, -1);
grn_expr_append_const_int32(efsi->ctx,
efsi->e,
min_interval,
GRN_OP_PUSH,
1);
n_args++;
}
if (fpclassify(weight) == FP_ZERO) {
grn_expr_append_op(efsi->ctx, efsi->e, mode, n_args);
Expand Down Expand Up @@ -4266,6 +4309,13 @@ parse_query_accept_string(grn_ctx *ctx, efs_info *efsi,
1);
grn_expr_take_obj(ctx, efsi->e, max_element_intervals);
n_args++;
int min_interval = grn_int32_value_at(&efsi->min_interval_stack, -1);
grn_expr_append_const_int32(efsi->ctx,
efsi->e,
min_interval,
GRN_OP_PUSH,
1);
n_args++;
}
if (fpclassify(weight) == FP_ZERO) {
grn_expr_append_op(efsi->ctx, efsi->e, mode, n_args);
Expand Down Expand Up @@ -4628,6 +4678,9 @@ parse_query(grn_ctx *ctx, efs_info *q)
GRN_PTR_PUT(ctx,
&q->max_element_intervals_stack,
data.options.near.max_element_intervals);
GRN_INT32_PUT(ctx,
&q->min_interval_stack,
data.options.near.min_interval);
break;
case GRN_OP_NEAR_PHRASE :
case GRN_OP_ORDERED_NEAR_PHRASE :
Expand All @@ -4642,6 +4695,9 @@ parse_query(grn_ctx *ctx, efs_info *q)
GRN_PTR_PUT(ctx,
&q->max_element_intervals_stack,
data.options.near.max_element_intervals);
GRN_INT32_PUT(ctx,
&q->min_interval_stack,
data.options.near.min_interval);
break;
case GRN_OP_SIMILAR :
GRN_INT32_PUT(ctx,
Expand Down Expand Up @@ -5238,6 +5294,9 @@ parse_script(grn_ctx *ctx, efs_info *q)
GRN_PTR_PUT(ctx,
&q->max_element_intervals_stack,
data.options.near.max_element_intervals);
GRN_INT32_PUT(ctx,
&q->min_interval_stack,
data.options.near.min_interval);
PARSE(token);
q->cur = next_start;
}
Expand Down Expand Up @@ -5681,6 +5740,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr,
GRN_INT32_INIT(&efsi.max_interval_stack, GRN_OBJ_VECTOR);
GRN_INT32_INIT(&efsi.additional_last_interval_stack, GRN_OBJ_VECTOR);
GRN_PTR_INIT(&efsi.max_element_intervals_stack, GRN_OBJ_VECTOR, GRN_ID_NIL);
GRN_INT32_INIT(&efsi.min_interval_stack, GRN_OBJ_VECTOR);
GRN_INT32_INIT(&efsi.similarity_threshold_stack, GRN_OBJ_VECTOR);
GRN_INT32_INIT(&efsi.quorum_threshold_stack, GRN_OBJ_VECTOR);
GRN_FLOAT32_INIT(&efsi.weight_stack, GRN_OBJ_VECTOR);
Expand Down Expand Up @@ -5747,6 +5807,7 @@ grn_expr_parse(grn_ctx *ctx, grn_obj *expr,
GRN_OBJ_FIN(ctx, &efsi.max_interval_stack);
GRN_OBJ_FIN(ctx, &efsi.additional_last_interval_stack);
GRN_OBJ_FIN(ctx, &efsi.max_element_intervals_stack);
GRN_OBJ_FIN(ctx, &efsi.min_interval_stack);
GRN_OBJ_FIN(ctx, &efsi.similarity_threshold_stack);
GRN_OBJ_FIN(ctx, &efsi.quorum_threshold_stack);
GRN_OBJ_FIN(ctx, &efsi.weight_stack);
Expand Down