Navigation Menu

Skip to content

Commit

Permalink
hash: increase max key size
Browse files Browse the repository at this point in the history
(0x1000)  4096 ->
(0xffff) 65535
  • Loading branch information
kou committed Apr 4, 2015
1 parent 294912c commit 1e870f3
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 59 deletions.
4 changes: 2 additions & 2 deletions lib/db.c
Expand Up @@ -7744,7 +7744,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
switch (DB_OBJ(obj)->header.type) {
case GRN_TABLE_HASH_KEY :
((grn_hash *)obj)->tokenizer = value;
((grn_hash *)obj)->header->tokenizer = grn_obj_id(ctx, value);
((grn_hash *)obj)->header.common->tokenizer = grn_obj_id(ctx, value);
rc = GRN_SUCCESS;
break;
case GRN_TABLE_PAT_KEY :
Expand All @@ -7765,7 +7765,7 @@ grn_obj_set_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *value)
switch (DB_OBJ(obj)->header.type) {
case GRN_TABLE_HASH_KEY :
((grn_hash *)obj)->normalizer = value;
((grn_hash *)obj)->header->normalizer = grn_obj_id(ctx, value);
((grn_hash *)obj)->header.common->normalizer = grn_obj_id(ctx, value);
rc = GRN_SUCCESS;
break;
case GRN_TABLE_PAT_KEY :
Expand Down
66 changes: 45 additions & 21 deletions lib/grn_hash.h
@@ -1,5 +1,5 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2009-2012 Brazil
/* Copyright(C) 2009-2015 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -187,7 +187,15 @@ GRN_API grn_id grn_table_queue_tail(grn_table_queue *queue);
/**** grn_hash ****/

#define GRN_HASH_TINY (0x01<<6)
#define GRN_HASH_MAX_KEY_SIZE GRN_TABLE_MAX_KEY_SIZE
#define GRN_HASH_MAX_KEY_SIZE_NORMAL GRN_TABLE_MAX_KEY_SIZE
#define GRN_HASH_MAX_KEY_SIZE_LARGE (0xffff)

#define GRN_HASH_IS_LARGE_KEY(hash)\
((hash)->key_size > GRN_HASH_MAX_KEY_SIZE_NORMAL)

typedef struct _grn_hash_header_common grn_hash_header_common;
typedef struct _grn_hash_header_normal grn_hash_header_normal;
typedef struct _grn_hash_header_large grn_hash_header_large;

struct _grn_hash {
grn_db_obj obj;
Expand All @@ -205,7 +213,11 @@ struct _grn_hash {

/* For grn_io_hash. */
grn_io *io;
struct grn_hash_header *header;
union {
grn_hash_header_common *common;
grn_hash_header_normal *normal;
grn_hash_header_large *large;
} header;
uint32_t *lock;
// uint32_t nref;
// unsigned int max_n_subrecs;
Expand All @@ -230,24 +242,36 @@ struct _grn_hash {
grn_tiny_bitmap bitmap;
};

/* Header of grn_io_hash. */
struct grn_hash_header {
uint32_t flags;
grn_encoding encoding;
uint32_t key_size;
uint32_t value_size;
grn_id tokenizer;
uint32_t curr_rec;
int32_t curr_key;
uint32_t idx_offset;
uint32_t entry_size;
uint32_t max_offset;
uint32_t n_entries;
uint32_t n_garbages;
uint32_t lock;
grn_id normalizer;
uint32_t reserved[15];
grn_id garbages[GRN_HASH_MAX_KEY_SIZE];
#define GRN_HASH_HEADER_COMMON_FIELDS\
uint32_t flags;\
grn_encoding encoding;\
uint32_t key_size;\
uint32_t value_size;\
grn_id tokenizer;\
uint32_t curr_rec;\
int32_t curr_key;\
uint32_t idx_offset;\
uint32_t entry_size;\
uint32_t max_offset;\
uint32_t n_entries;\
uint32_t n_garbages;\
uint32_t lock;\
grn_id normalizer;\
uint32_t reserved[15]

struct _grn_hash_header_common {
GRN_HASH_HEADER_COMMON_FIELDS;
};

struct _grn_hash_header_normal {
GRN_HASH_HEADER_COMMON_FIELDS;
grn_id garbages[GRN_HASH_MAX_KEY_SIZE_NORMAL];
grn_table_queue queue;
};

struct _grn_hash_header_large {
GRN_HASH_HEADER_COMMON_FIELDS;
grn_id garbages[GRN_HASH_MAX_KEY_SIZE_LARGE];
grn_table_queue queue;
};

Expand Down
88 changes: 61 additions & 27 deletions lib/hash.c
@@ -1,6 +1,6 @@
/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2009-2012 Brazil
Copyright(C) 2009-2015 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -1167,7 +1167,11 @@ grn_array_unblock(grn_ctx *ctx, grn_array *array)
/* grn_hash : hash table */

#define GRN_HASH_MAX_SEGMENT 0x400
#define GRN_HASH_HEADER_SIZE 0x9000
#define GRN_HASH_HEADER_SIZE_NORMAL 0x9000
#define GRN_HASH_HEADER_SIZE_LARGE\
(GRN_HASH_HEADER_SIZE_NORMAL +\
(sizeof(grn_id) *\
(GRN_HASH_MAX_KEY_SIZE_LARGE - GRN_HASH_MAX_KEY_SIZE_NORMAL)))
#define GRN_HASH_SEGMENT_SIZE 0x400000
#define W_OF_KEY_IN_A_SEGMENT 22
#define IDX_MASK_IN_A_SEGMENT 0xfffff
Expand Down Expand Up @@ -1298,7 +1302,7 @@ inline static grn_id *
grn_hash_idx_at(grn_ctx *ctx, grn_hash *hash, grn_id id)
{
if (grn_hash_is_io_hash(hash)) {
id = (id & *hash->max_offset) + hash->header->idx_offset;
id = (id & *hash->max_offset) + hash->header.common->idx_offset;
return grn_io_hash_idx_at(ctx, hash, id);
} else {
return hash->index + (id & *hash->max_offset);
Expand Down Expand Up @@ -1380,15 +1384,18 @@ grn_io_hash_entry_put_key(grn_ctx *ctx, grn_hash *hash,
key_offset = entry->key.offset;
} else {
uint32_t segment_id;
grn_hash_header_common *header;

header = hash->header.common;
if (key_size >= GRN_HASH_SEGMENT_SIZE) {
return GRN_INVALID_ARGUMENT;
}
key_offset = hash->header->curr_key;
key_offset = header->curr_key;
segment_id = (key_offset + key_size) >> W_OF_KEY_IN_A_SEGMENT;
if ((key_offset >> W_OF_KEY_IN_A_SEGMENT) != segment_id) {
key_offset = hash->header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT;
key_offset = header->curr_key = segment_id << W_OF_KEY_IN_A_SEGMENT;
}
hash->header->curr_key += key_size;
header->curr_key += key_size;
entry->key.offset = key_offset;
}

Expand Down Expand Up @@ -1537,7 +1544,8 @@ grn_io_hash_calculate_entry_size(uint32_t key_size, uint32_t value_size,
}

static grn_io *
grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size)
grn_io_hash_create_io(grn_ctx *ctx, const char *path,
uint32_t header_size, uint32_t entry_size)
{
uint32_t w_of_element = 0;
grn_io_array_spec array_spec[4];
Expand All @@ -1555,7 +1563,7 @@ grn_io_hash_create_io(grn_ctx *ctx, const char *path, uint32_t entry_size)
array_spec[GRN_HASH_INDEX_SEGMENT].max_n_segments = 1U << (30 - (22 - 2));
array_spec[GRN_HASH_BITMAP_SEGMENT].w_of_element = 0;
array_spec[GRN_HASH_BITMAP_SEGMENT].max_n_segments = 1U << (30 - (22 + 3));
return grn_io_create_with_array(ctx, path, GRN_HASH_HEADER_SIZE,
return grn_io_create_with_array(ctx, path, header_size,
GRN_HASH_SEGMENT_SIZE,
grn_io_auto, 4, array_spec);
}
Expand All @@ -1566,12 +1574,17 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
grn_encoding encoding, uint32_t init_size)
{
grn_io *io;
struct grn_hash_header *header;
uint32_t entry_size, max_offset;
grn_hash_header_common *header;
uint32_t header_size, entry_size, max_offset;

if (key_size <= GRN_HASH_MAX_KEY_SIZE_NORMAL) {
header_size = GRN_HASH_HEADER_SIZE_NORMAL;
} else {
header_size = GRN_HASH_HEADER_SIZE_LARGE;
}
entry_size = grn_io_hash_calculate_entry_size(key_size, value_size, flags);

io = grn_io_hash_create_io(ctx, path, entry_size);
io = grn_io_hash_create_io(ctx, path, header_size, entry_size);
if (!io) {
return GRN_NO_MEMORY_AVAILABLE;
}
Expand All @@ -1587,6 +1600,8 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
encoding = ctx->encoding;
}

hash->key_size = key_size;

header = grn_io_header(io);
header->flags = flags;
header->encoding = encoding;
Expand All @@ -1610,19 +1625,26 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
header->normalizer = GRN_ID_NIL;
}
GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
grn_table_queue_init(ctx, &header->queue);
{
grn_table_queue *queue;
if (GRN_HASH_IS_LARGE_KEY(hash)) {
queue = &(((grn_hash_header_large *)(header))->queue);
} else {
queue = &(((grn_hash_header_normal *)(header))->queue);
}
grn_table_queue_init(ctx, queue);
}

hash->obj.header.flags = header->flags;
hash->ctx = ctx;
hash->key_size = key_size;
hash->encoding = encoding;
hash->value_size = value_size;
hash->entry_size = entry_size;
hash->n_garbages = &header->n_garbages;
hash->n_entries = &header->n_entries;
hash->max_offset = &header->max_offset;
hash->io = io;
hash->header = header;
hash->header.common = header;
hash->lock = &header->lock;
hash->tokenizer = NULL;
return GRN_SUCCESS;
Expand Down Expand Up @@ -1711,7 +1733,7 @@ grn_hash_create(grn_ctx *ctx, const char *path, uint32_t key_size, uint32_t valu
if (!ctx) {
return NULL;
}
if (key_size > GRN_HASH_MAX_KEY_SIZE) {
if (key_size > GRN_HASH_MAX_KEY_SIZE_LARGE) {
return NULL;
}
hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash));
Expand All @@ -1732,7 +1754,7 @@ grn_hash_open(grn_ctx *ctx, const char *path)
if (ctx) {
grn_io * const io = grn_io_open(ctx, path, grn_io_auto);
if (io) {
struct grn_hash_header * const header = grn_io_header(io);
grn_hash_header_common * const header = grn_io_header(io);
if (grn_io_get_type(io) == GRN_TABLE_HASH_KEY) {
grn_hash * const hash = (grn_hash *)GRN_MALLOC(sizeof(grn_hash));
if (hash) {
Expand All @@ -1747,7 +1769,7 @@ grn_hash_open(grn_ctx *ctx, const char *path)
hash->n_entries = &header->n_entries;
hash->max_offset = &header->max_offset;
hash->io = io;
hash->header = header;
hash->header.common = header;
hash->lock = &header->lock;
hash->tokenizer = grn_ctx_at(ctx, header->tokenizer);
if (header->flags & GRN_OBJ_KEY_NORMALIZE) {
Expand Down Expand Up @@ -1911,7 +1933,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries)

if (grn_hash_is_io_hash(hash)) {
uint32_t i;
src_offset = hash->header->idx_offset;
src_offset = hash->header.common->idx_offset;
dest_offset = MAX_INDEX_SIZE - src_offset;
for (i = 0; i < new_index_size; i += (IDX_MASK_IN_A_SEGMENT + 1)) {
/*
Expand Down Expand Up @@ -1979,7 +2001,7 @@ grn_hash_reset(grn_ctx *ctx, grn_hash *hash, uint32_t expected_n_entries)
}

if (grn_hash_is_io_hash(hash)) {
hash->header->idx_offset = dest_offset;
hash->header.common->idx_offset = dest_offset;
} else {
grn_id * const old_index = hash->index;
hash->index = new_index;
Expand Down Expand Up @@ -2038,15 +2060,22 @@ grn_io_hash_add(grn_ctx *ctx, grn_hash *hash, uint32_t hash_value,
{
grn_id entry_id;
grn_hash_entry *entry;
struct grn_hash_header * const header = hash->header;
grn_hash_header_common * const header = hash->header.common;
grn_id *garbages;

entry_id = header->garbages[key_size - 1];
if (GRN_HASH_IS_LARGE_KEY(hash)) {
garbages = hash->header.large->garbages;
} else {
garbages = hash->header.normal->garbages;
}

entry_id = garbages[key_size - 1];
if (entry_id) {
entry = grn_io_hash_entry_at(ctx, hash, entry_id, GRN_TABLE_ADD);
if (!entry) {
return GRN_ID_NIL;
}
header->garbages[key_size - 1] = *(grn_id *)entry;
garbages[key_size - 1] = *(grn_id *)entry;
if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
/* keep entry->io_entry's hash_value, flag, key_size and key. */
memset(entry->io_entry.value, 0, header->value_size);
Expand Down Expand Up @@ -2436,9 +2465,14 @@ grn_hash_set_value(grn_ctx *ctx, grn_hash *hash, grn_id id,
*ep = GARBAGE;\
if (grn_hash_is_io_hash(hash)) {\
uint32_t size = key_size - 1;\
struct grn_hash_header *hh = hash->header;\
ee->key = hh->garbages[size];\
hh->garbages[size] = e;\
grn_id *garbages;\
if (GRN_HASH_IS_LARGE_KEY(hash)) {\
garbages = hash->header.large->garbages;\
} else {\
garbages = hash->header.normal->garbages;\
}\
ee->key = garbages[size];\
garbages[size] = e;\
grn_io_array_bit_off(ctx, hash->io, GRN_HASH_BITMAP_SEGMENT, e);\
} else {\
ee->key = hash->garbages;\
Expand Down Expand Up @@ -2537,7 +2571,7 @@ grn_hash_cursor_close(grn_ctx *ctx, grn_hash_cursor *c)
}

#define HASH_CURR_MAX(hash) \
((grn_hash_is_io_hash(hash)) ? (hash)->header->curr_rec : (hash)->a.max)
((grn_hash_is_io_hash(hash)) ? (hash)->header.common->curr_rec : (hash)->a.max)

grn_hash_cursor *
grn_hash_cursor_open(grn_ctx *ctx, grn_hash *hash,
Expand Down Expand Up @@ -3061,7 +3095,7 @@ void
grn_hash_check(grn_ctx *ctx, grn_hash *hash)
{
char buf[8];
struct grn_hash_header *h = hash->header;
grn_hash_header_common *h = hash->header.common;
GRN_OUTPUT_ARRAY_OPEN("RESULT", 1);
GRN_OUTPUT_MAP_OPEN("SUMMARY", 25);
GRN_OUTPUT_CSTR("flags");
Expand Down
2 changes: 1 addition & 1 deletion lib/proc.c
Expand Up @@ -2659,7 +2659,7 @@ dump_plugins(grn_ctx *ctx, grn_obj *outbuf)
return;
}

processed_paths = grn_hash_create(ctx, NULL, GRN_HASH_MAX_KEY_SIZE, 0,
processed_paths = grn_hash_create(ctx, NULL, GRN_TABLE_MAX_KEY_SIZE, 0,
GRN_OBJ_TABLE_HASH_KEY |
GRN_OBJ_KEY_VAR_SIZE);
if (!processed_paths) {
Expand Down
4 changes: 2 additions & 2 deletions test/unit/core/test-hash-cursor.c
@@ -1,6 +1,6 @@
/* -*- c-basic-offset: 2; coding: utf-8 -*- */
/*
Copyright (C) 2008-2012 Kouhei Sutou <kou@clear-code.com>
Copyright (C) 2008-2015 Kouhei Sutou <kou@clear-code.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
Expand Down Expand Up @@ -53,7 +53,7 @@ cut_setup(void)
keys_and_values = NULL;

grn_test_hash_factory_set_flags(factory, GRN_OBJ_KEY_VAR_SIZE);
grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE);
grn_test_hash_factory_set_key_size(factory, GRN_HASH_MAX_KEY_SIZE_NORMAL);

sample_value = NULL;
}
Expand Down

0 comments on commit 1e870f3

Please sign in to comment.