Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
column_create: add more validations
1: Full text search index for vector column must have WITH_SECTION flag.

2: Full text search index for vector column must not be multi column
index. The following command returns nothing:

    plugin_register functions/index_column

    table_create Docs TABLE_HASH_KEY ShortText
    column_create Docs sentences1 COLUMN_VECTOR Text
    column_create Docs sentences2 COLUMN_VECTOR Text

    table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
    column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION|WITH_SECTION Docs sentences1,sentences2

    load --table Docs
    [
    {"_key": "x", "sentences1": ["-", "-", "-"], "sentences2": ["-", "-", "-"]}
    ]

    load --table Docs
    [
    {"_key": "x", "sentences1": []}
    ]

    select Words \
      --limit -1 \
      --sort_keys _key \
      --output_columns '_key, index_column_source_records("docs_sentences")'

index_column_source_records() output:

    [
      [
        0,
        0.0,
        0.0
      ],
      [
        [
          [
            1
          ],
          [
            [
              "_key",
              "ShortText"
            ],
            [
              "index_column_source_records",
              null
            ]
          ],
          [
            "-",
            [

            ]
          ]
        ]
      ]
    ]

The second load removes posting lists for Docs.sentences2
unexpectedly. We can remove the 2nd validation when we support the
update case.
  • Loading branch information
kou committed Jun 25, 2018
1 parent a2815fc commit 08e2456
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 128 deletions.
64 changes: 44 additions & 20 deletions lib/db.c
Expand Up @@ -8804,6 +8804,7 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
grn_obj *lexicon_domain = NULL; grn_obj *lexicon_domain = NULL;
grn_bool lexicon_domain_is_table; grn_bool lexicon_domain_is_table;
grn_bool lexicon_have_tokenizer; grn_bool lexicon_have_tokenizer;
grn_bool is_full_text_search_index;
grn_id *source_ids; grn_id *source_ids;
int i, n_source_ids; int i, n_source_ids;


Expand All @@ -8824,29 +8825,13 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL);
lexicon_have_tokenizer = (tokenizer != NULL); lexicon_have_tokenizer = (tokenizer != NULL);
} }
is_full_text_search_index =
(grn_obj_is_index_column(ctx, obj) &&
(obj->header.flags & GRN_OBJ_WITH_POSITION) &&
lexicon_have_tokenizer);


source_ids = (grn_id *)GRN_BULK_HEAD(value); source_ids = (grn_id *)GRN_BULK_HEAD(value);
n_source_ids = GRN_BULK_VSIZE(value) / sizeof(grn_id); n_source_ids = GRN_BULK_VSIZE(value) / sizeof(grn_id);
if (grn_obj_is_index_column(ctx, obj) && n_source_ids == 1) {
grn_obj *source;

source = grn_ctx_at(ctx, source_ids[0]);
if (grn_obj_is_vector_column(ctx, source) &&
(obj->header.flags & GRN_OBJ_WITH_POSITION) &&
lexicon_have_tokenizer &&
!(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
char index_name[GRN_TABLE_MAX_KEY_SIZE];
int index_name_size;
index_name_size = grn_obj_name(ctx, obj,
index_name, GRN_TABLE_MAX_KEY_SIZE);
ERR(GRN_INVALID_ARGUMENT,
"grn_obj_set_info(): GRN_INFO_SOURCE: "
"full text index for vector column "
"must be created with WITH_SECTION flag: <%.*s>",
index_name_size, index_name);
goto exit;
}
}


if (n_source_ids > 1 && !(obj->header.flags & GRN_OBJ_WITH_SECTION)) { if (n_source_ids > 1 && !(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
char index_name[GRN_TABLE_MAX_KEY_SIZE]; char index_name[GRN_TABLE_MAX_KEY_SIZE];
Expand All @@ -8860,6 +8845,45 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
goto exit; goto exit;
} }


if (is_full_text_search_index) {
grn_bool have_vector_source_column = GRN_FALSE;

for (i = 0; i < n_source_ids; i++) {
grn_obj *source;

source = grn_ctx_at(ctx, source_ids[i]);
if (!grn_obj_is_vector_column(ctx, source)) {
continue;
}

have_vector_source_column = GRN_TRUE;
if (!(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
char index_name[GRN_TABLE_MAX_KEY_SIZE];
int index_name_size;
index_name_size = grn_obj_name(ctx, obj,
index_name, GRN_TABLE_MAX_KEY_SIZE);
ERR(GRN_INVALID_ARGUMENT,
"grn_obj_set_info(): GRN_INFO_SOURCE: "
"full text index for vector column "
"must be created with WITH_SECTION flag: <%.*s>",
index_name_size, index_name);
goto exit;
}
}

if (have_vector_source_column && n_source_ids > 1) {
char index_name[GRN_TABLE_MAX_KEY_SIZE];
int index_name_size;
index_name_size = grn_obj_name(ctx, obj,
index_name, GRN_TABLE_MAX_KEY_SIZE);
ERR(GRN_INVALID_ARGUMENT,
"grn_obj_set_info(): GRN_INFO_SOURCE: "
"multi column full text index with vector column isn't supported yet: "
"<%.*s>",
index_name_size, index_name);
goto exit;
}
}


for (i = 0; i < n_source_ids; i++) { for (i = 0; i < n_source_ids; i++) {
grn_id source_id = source_ids[i]; grn_id source_id = source_ids[i];
Expand Down
@@ -0,0 +1,23 @@
plugin_register functions/index_column
[[0,0.0,0.0],true]
table_create Docs TABLE_NO_KEY
[[0,0.0,0.0],true]
column_create Docs title COLUMN_SCALAR ShortText
[[0,0.0,0.0],true]
column_create Docs sentences COLUMN_VECTOR Text
[[0,0.0,0.0],true]
table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
[[0,0.0,0.0],true]
column_create Words docs_content COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences
[
[
[
-22,
0.0,
0.0
],
"grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content>"
],
false
]
#|e| grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content>
@@ -0,0 +1,9 @@
plugin_register functions/index_column

table_create Docs TABLE_NO_KEY
column_create Docs title COLUMN_SCALAR ShortText
column_create Docs sentences COLUMN_VECTOR Text

table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
column_create Words docs_content \
COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences
@@ -0,0 +1,10 @@
plugin_register functions/index_column
[[0,0.0,0.0],true]
table_create Docs TABLE_NO_KEY
[[0,0.0,0.0],true]
column_create Docs sentences COLUMN_VECTOR Text
[[0,0.0,0.0],true]
table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
[[0,0.0,0.0],true]
column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences
[[0,0.0,0.0],true]
@@ -0,0 +1,7 @@
plugin_register functions/index_column

table_create Docs TABLE_NO_KEY
column_create Docs sentences COLUMN_VECTOR Text

table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences
Expand Up @@ -4,11 +4,6 @@ table_create Docs TABLE_NO_KEY
[[0,0.0,0.0],true] [[0,0.0,0.0],true]
column_create Docs sentences COLUMN_VECTOR Text column_create Docs sentences COLUMN_VECTOR Text
[[0,0.0,0.0],true] [[0,0.0,0.0],true]
load --table Docs
[
{"sentences": ["-", "-", "-"]}
]
[[0,0.0,0.0],1]
table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
[[0,0.0,0.0],true] [[0,0.0,0.0],true]
column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
Expand All @@ -24,28 +19,3 @@ column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
false false
] ]
#|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences> #|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences>
select Words --limit -1 --sort_keys _key --output_columns '_key, index_column_source_records("docs_sentences")'
[
[
0,
0.0,
0.0
],
[
[
[
0
],
[
[
"_key",
"ShortText"
],
[
"index_column_source_records",
null
]
]
]
]
]
Expand Up @@ -3,15 +3,5 @@ plugin_register functions/index_column
table_create Docs TABLE_NO_KEY table_create Docs TABLE_NO_KEY
column_create Docs sentences COLUMN_VECTOR Text column_create Docs sentences COLUMN_VECTOR Text


load --table Docs
[
{"sentences": ["-", "-", "-"]}
]

table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences

select Words \
--limit -1 \
--sort_keys _key \
--output_columns '_key, index_column_source_records("docs_sentences")'

This file was deleted.

This file was deleted.

0 comments on commit 08e2456

Please sign in to comment.