Navigation Menu

Skip to content

Commit

Permalink
ja: add zlib compression support for vector data
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Feb 19, 2018
1 parent 9c9120b commit 36f5d7f
Show file tree
Hide file tree
Showing 5 changed files with 322 additions and 3 deletions.
147 changes: 144 additions & 3 deletions lib/store.c
Expand Up @@ -1768,6 +1768,147 @@ grn_ja_put_zlib(grn_ctx *ctx, grn_ja *ja, grn_id id,
GRN_FREE(zvalue);
return rc;
}

grn_inline static grn_rc
grn_ja_putv_zlib(grn_ctx *ctx,
grn_ja *ja,
grn_id id,
grn_obj *header,
grn_obj *body,
grn_obj *footer,
int flags)
{
grn_rc rc;
const size_t header_size = GRN_BULK_VSIZE(header);
const size_t body_size = body ? GRN_BULK_VSIZE(body) : 0;
const size_t footer_size = GRN_BULK_VSIZE(footer);
const size_t size = header_size + body_size + footer_size;
z_stream zstream;
Bytef *zvalue = NULL;
int zwindow_bits = 15;
int zmem_level = 8;
int zrc;

if (size < COMPRESS_THRESHOLD_BYTE) {
return grn_ja_putv_packed(ctx, ja, id, header, body, footer, flags);
}

zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zrc = deflateInit2(&zstream,
Z_DEFAULT_COMPRESSION,
Z_DEFLATED,
zwindow_bits,
zmem_level,
Z_DEFAULT_STRATEGY);
if (zrc != Z_OK) {
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to initialize compressor",
grn_zrc_to_string(zrc));
return ctx->rc;
}

zstream.avail_out = deflateBound(&zstream, size);
zvalue = GRN_MALLOC(zstream.avail_out);
zstream.next_out = zvalue;
if (!zstream.next_out) {
deflateEnd(&zstream);
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to allocate compress buffer",
NULL);
return ctx->rc;
}

zstream.next_in = GRN_BULK_HEAD(header);
zstream.avail_in = header_size;
zrc = deflate(&zstream, Z_NO_FLUSH);
if (zrc != Z_OK) {
GRN_FREE(zvalue);
deflateEnd(&zstream);
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to compress header",
grn_zrc_to_string(zrc));
return ctx->rc;
}

if (body_size > 0) {
zstream.next_in = GRN_BULK_HEAD(body);
zstream.avail_in = body_size;
zrc = deflate(&zstream, Z_NO_FLUSH);
if (zrc != Z_OK) {
GRN_FREE(zvalue);
deflateEnd(&zstream);
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to compress body",
grn_zrc_to_string(zrc));
return ctx->rc;
}
}

if (footer_size > 0) {
zstream.next_in = GRN_BULK_HEAD(footer);
zstream.avail_in = footer_size;
zrc = deflate(&zstream, Z_NO_FLUSH);
if (zrc != Z_OK) {
GRN_FREE(zvalue);
deflateEnd(&zstream);
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to compress footer",
grn_zrc_to_string(zrc));
return ctx->rc;
}
}

zrc = deflate(&zstream, Z_FINISH);
if (zrc != Z_STREAM_END) {
GRN_FREE(zvalue);
deflateEnd(&zstream);
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to finish compression",
grn_zrc_to_string(zrc));
return ctx->rc;
}

rc = grn_ja_putv_compressed(ctx,
ja,
id,
zvalue,
zstream.total_out,
size,
flags);

GRN_FREE(zvalue);
zrc = deflateEnd(&zstream);
if (zrc != Z_OK) {
grn_ja_compress_error(ctx,
ja,
id,
GRN_ZLIB_ERROR,
"[zlib] failed to free compressor",
grn_zrc_to_string(zrc));
return ctx->rc;
}

return rc;
}
#endif /* GRN_WITH_ZLIB */

#ifdef GRN_WITH_LZ4
Expand Down Expand Up @@ -2208,9 +2349,9 @@ grn_ja_putv(grn_ctx *ctx, grn_ja *ja, grn_id id, grn_obj *vector, int flags)

switch (ja->header->flags & GRN_OBJ_COMPRESS_MASK) {
#ifdef GRN_WITH_ZLIB
/* case GRN_OBJ_COMPRESS_ZLIB : */
/* rc = grn_ja_putv_zlib(ctx, ja, id, &header, body, &footer, flags); */
/* break; */
case GRN_OBJ_COMPRESS_ZLIB :
rc = grn_ja_putv_zlib(ctx, ja, id, &header, body, &footer, flags);
break;
#endif /* GRN_WITH_ZLIB */
#ifdef GRN_WITH_LZ4
case GRN_OBJ_COMPRESS_LZ4 :
Expand Down
67 changes: 67 additions & 0 deletions test/command/suite/select/output/zlib/vector/compressed.expected
@@ -0,0 +1,67 @@
table_create Entries TABLE_PAT_KEY ShortText
[[0,0.0,0.0],true]
column_create Entries contents COLUMN_VECTOR|COMPRESS_ZLIB Text
[[0,0.0,0.0],true]
load --table Entries
[
{
"_key": "Groonga",
"contents": [
"(256 bytes or more required for compression.)",
"Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."
]
},
{
"_key": "Mroonga",
"contents": [
"(256 bytes or more required for compression.)",
"Mroonga is a storage engine for MySQL. It provides fast fulltext search feature for all languages including Chinese, Japanese and Korean to all MySQL users. Mroonga was called Groonga storage engine. Mroonga is Tritonn successor."
]
}
]
[[0,0.0,0.0],2]
select Entries
[
[
0,
0.0,
0.0
],
[
[
[
2
],
[
[
"_id",
"UInt32"
],
[
"_key",
"ShortText"
],
[
"contents",
"Text"
]
],
[
1,
"Groonga",
[
"(256 bytes or more required for compression.)",
"Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."
]
],
[
2,
"Mroonga",
[
"(256 bytes or more required for compression.)",
"Mroonga is a storage engine for MySQL. It provides fast fulltext search feature for all languages including Chinese, Japanese and Korean to all MySQL users. Mroonga was called Groonga storage engine. Mroonga is Tritonn successor."
]
]
]
]
]
22 changes: 22 additions & 0 deletions test/command/suite/select/output/zlib/vector/compressed.test
@@ -0,0 +1,22 @@
table_create Entries TABLE_PAT_KEY ShortText
column_create Entries contents COLUMN_VECTOR|COMPRESS_ZLIB Text

load --table Entries
[
{
"_key": "Groonga",
"contents": [
"(256 bytes or more required for compression.)",
"Groonga is a fast and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Groonga allows updates without read locks. These characteristics result in superior performance on real-time applications."
]
},
{
"_key": "Mroonga",
"contents": [
"(256 bytes or more required for compression.)",
"Mroonga is a storage engine for MySQL. It provides fast fulltext search feature for all languages including Chinese, Japanese and Korean to all MySQL users. Mroonga was called Groonga storage engine. Mroonga is Tritonn successor."
]
}
]

select Entries
67 changes: 67 additions & 0 deletions test/command/suite/select/output/zlib/vector/packed.expected
@@ -0,0 +1,67 @@
table_create Entries TABLE_PAT_KEY ShortText
[[0,0.0,0.0],true]
column_create Entries contents COLUMN_VECTOR|COMPRESS_ZLIB Text
[[0,0.0,0.0],true]
load --table Entries
[
{
"_key": "Groonga",
"contents": [
"I found Groonga.",
"It's a fast fulltext search engine!"
]
},
{
"_key": "Mroonga",
"contents": [
"I found Mroonga.",
"It's a MySQL storage engine to use Groonga!"
]
}
]
[[0,0.0,0.0],2]
select Entries
[
[
0,
0.0,
0.0
],
[
[
[
2
],
[
[
"_id",
"UInt32"
],
[
"_key",
"ShortText"
],
[
"contents",
"Text"
]
],
[
1,
"Groonga",
[
"I found Groonga.",
"It's a fast fulltext search engine!"
]
],
[
2,
"Mroonga",
[
"I found Mroonga.",
"It's a MySQL storage engine to use Groonga!"
]
]
]
]
]
22 changes: 22 additions & 0 deletions test/command/suite/select/output/zlib/vector/packed.test
@@ -0,0 +1,22 @@
table_create Entries TABLE_PAT_KEY ShortText
column_create Entries contents COLUMN_VECTOR|COMPRESS_ZLIB Text

load --table Entries
[
{
"_key": "Groonga",
"contents": [
"I found Groonga.",
"It's a fast fulltext search engine!"
]
},
{
"_key": "Mroonga",
"contents": [
"I found Mroonga.",
"It's a MySQL storage engine to use Groonga!"
]
}
]

select Entries

0 comments on commit 36f5d7f

Please sign in to comment.