Skip to content

Commit

Permalink
Convert rtree from (void *) to (uint8_t) storage.
Browse files Browse the repository at this point in the history
Reduce rtree memory usage by storing booleans (1 byte each) rather than
pointers.  The rtree code is only used to record whether jemalloc manages
a chunk of memory, so there's no need to store pointers in the rtree.

Increase rtree node size to 64 KiB in order to reduce tree depth from 13
to 3 on 64-bit systems.  The conversion to more compact leaf nodes was
enough by itself to make the rtree depth 1 on 32-bit systems; due to the
fact that root nodes are smaller than the specified node size if
possible, the node size change has no impact on 32-bit systems (assuming
default chunk size).
  • Loading branch information
Jason Evans committed Jan 3, 2014
1 parent b980cc7 commit b954bc5
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 55 deletions.
2 changes: 1 addition & 1 deletion include/jemalloc/internal/jemalloc_internal.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ ivsalloc(const void *ptr, bool demote)
{

/* Return 0 if ptr is not within a chunk managed by jemalloc. */
if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
return (0);

return (isalloc(ptr, demote));
Expand Down
38 changes: 20 additions & 18 deletions include/jemalloc/internal/rtree.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ typedef struct rtree_s rtree_t;
* Size of each radix tree node (must be a power of 2). This impacts tree
* depth.
*/
#if (LG_SIZEOF_PTR == 2)
# define RTREE_NODESIZE (1U << 14)
#else
# define RTREE_NODESIZE CACHELINE
#endif
#define RTREE_NODESIZE (1U << 16)

typedef void *(rtree_alloc_t)(size_t);
typedef void (rtree_dalloc_t)(void *);
Expand Down Expand Up @@ -52,19 +48,19 @@ void rtree_postfork_child(rtree_t *rtree);

#ifndef JEMALLOC_ENABLE_INLINE
#ifdef JEMALLOC_DEBUG
void *rtree_get_locked(rtree_t *rtree, uintptr_t key);
uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key);
#endif
void *rtree_get(rtree_t *rtree, uintptr_t key);
bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
uint8_t rtree_get(rtree_t *rtree, uintptr_t key);
bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val);
#endif

#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
#define RTREE_GET_GENERATE(f) \
/* The least significant bits of the key are ignored. */ \
JEMALLOC_INLINE void * \
JEMALLOC_INLINE uint8_t \
f(rtree_t *rtree, uintptr_t key) \
{ \
void *ret; \
uint8_t ret; \
uintptr_t subkey; \
unsigned i, lshift, height, bits; \
void **node, **child; \
Expand All @@ -79,7 +75,7 @@ f(rtree_t *rtree, uintptr_t key) \
child = (void**)node[subkey]; \
if (child == NULL) { \
RTREE_UNLOCK(&rtree->mutex); \
return (NULL); \
return (0); \
} \
} \
\
Expand All @@ -90,7 +86,10 @@ f(rtree_t *rtree, uintptr_t key) \
bits = rtree->level2bits[i]; \
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
bits); \
ret = node[subkey]; \
{ \
uint8_t *leaf = (uint8_t *)node; \
ret = leaf[subkey]; \
} \
RTREE_UNLOCK(&rtree->mutex); \
\
RTREE_GET_VALIDATE \
Expand Down Expand Up @@ -129,7 +128,7 @@ RTREE_GET_GENERATE(rtree_get)
#undef RTREE_GET_VALIDATE

JEMALLOC_INLINE bool
rtree_set(rtree_t *rtree, uintptr_t key, void *val)
rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val)
{
uintptr_t subkey;
unsigned i, lshift, height, bits;
Expand All @@ -144,22 +143,25 @@ rtree_set(rtree_t *rtree, uintptr_t key, void *val)
bits);
child = (void**)node[subkey];
if (child == NULL) {
child = (void**)rtree->alloc(sizeof(void *) <<
rtree->level2bits[i+1]);
size_t size = ((i + 1 < height - 1) ? sizeof(void *)
: (sizeof(uint8_t))) << rtree->level2bits[i+1];
child = (void**)rtree->alloc(size);
if (child == NULL) {
malloc_mutex_unlock(&rtree->mutex);
return (true);
}
memset(child, 0, sizeof(void *) <<
rtree->level2bits[i+1]);
memset(child, 0, size);
node[subkey] = child;
}
}

/* node is a leaf, so it contains values rather than node pointers. */
bits = rtree->level2bits[i];
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
node[subkey] = val;
{
uint8_t *leaf = (uint8_t *)node;
leaf[subkey] = val;
}
malloc_mutex_unlock(&rtree->mutex);

return (false);
Expand Down
4 changes: 2 additions & 2 deletions src/chunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
label_return:
if (ret != NULL) {
if (config_ivsalloc && base == false) {
if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) {
chunk_dealloc(ret, size, true);
return (NULL);
}
Expand Down Expand Up @@ -321,7 +321,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
assert((size & chunksize_mask) == 0);

if (config_ivsalloc)
rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
rtree_set(chunks_rtree, (uintptr_t)chunk, 0);
if (config_stats || config_prof) {
malloc_mutex_lock(&chunks_mtx);
assert(stats_chunks.curchunks >= (size / chunksize));
Expand Down
41 changes: 25 additions & 16 deletions src/rtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@ rtree_t *
rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
{
rtree_t *ret;
unsigned bits_per_level, height, i;
unsigned bits_per_level, bits_in_leaf, height, i;

assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));

bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
height = bits / bits_per_level;
if (height * bits_per_level != bits)
height++;
assert(height * bits_per_level >= bits);
bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1;
if (bits > bits_in_leaf) {
height = 1 + (bits - bits_in_leaf) / bits_per_level;
if ((height-1) * bits_per_level + bits_in_leaf != bits)
height++;
} else {
height = 1;
}
assert((height-1) * bits_per_level + bits_in_leaf >= bits);

ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
(sizeof(unsigned) * height));
Expand All @@ -25,23 +30,27 @@ rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
ret->alloc = alloc;
ret->dalloc = dalloc;
if (malloc_mutex_init(&ret->mutex)) {
/* Leak the rtree. */
if (dalloc != NULL)
dalloc(ret);
return (NULL);
}
ret->height = height;
if (bits_per_level * height > bits)
ret->level2bits[0] = bits % bits_per_level;
else
ret->level2bits[0] = bits_per_level;
for (i = 1; i < height; i++)
ret->level2bits[i] = bits_per_level;
if (height > 1) {
if ((height-1) * bits_per_level + bits_in_leaf > bits) {
ret->level2bits[0] = (bits - bits_in_leaf) %
bits_per_level;
} else
ret->level2bits[0] = bits_per_level;
for (i = 1; i < height-1; i++)
ret->level2bits[i] = bits_per_level;
ret->level2bits[height-1] = bits_in_leaf;
} else
ret->level2bits[0] = bits;

ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
if (ret->root == NULL) {
/*
* We leak the rtree here, since there's no generic base
* deallocation.
*/
if (dalloc != NULL)
dalloc(ret);
return (NULL);
}
memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
Expand Down
34 changes: 16 additions & 18 deletions test/unit/rtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ TEST_BEGIN(test_rtree_get_empty)

for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
rtree_t *rtree = rtree_new(i, imalloc, idalloc);
assert_ptr_null(rtree_get(rtree, 0),
assert_u_eq(rtree_get(rtree, 0), 0,
"rtree_get() should return NULL for empty tree");
rtree_delete(rtree);
}
Expand All @@ -20,12 +20,12 @@ TEST_BEGIN(test_rtree_extrema)
for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
rtree_t *rtree = rtree_new(i, imalloc, idalloc);

rtree_set(rtree, 0, (void *)1);
assert_ptr_eq(rtree_get(rtree, 0), (void *)1,
rtree_set(rtree, 0, 1);
assert_u_eq(rtree_get(rtree, 0), 1,
"rtree_get() should return previously set value");

rtree_set(rtree, ~((uintptr_t)0), (void *)1);
assert_ptr_eq(rtree_get(rtree, ~((uintptr_t)0)), (void *)1,
rtree_set(rtree, ~((uintptr_t)0), 1);
assert_u_eq(rtree_get(rtree, ~((uintptr_t)0)), 1,
"rtree_get() should return previously set value");

rtree_delete(rtree);
Expand All @@ -43,21 +43,19 @@ TEST_BEGIN(test_rtree_bits)
rtree_t *rtree = rtree_new(i, imalloc, idalloc);

for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
rtree_set(rtree, keys[j], (void *)1);
rtree_set(rtree, keys[j], 1);
for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
assert_ptr_eq(rtree_get(rtree, keys[k]),
(void *)1,
assert_u_eq(rtree_get(rtree, keys[k]), 1,
"rtree_get() should return previously set "
"value and ignore insignificant key bits; "
"i=%u, j=%u, k=%u, set key=%#x, "
"get key=%#x", i, j, k, keys[j], keys[k]);
}
assert_ptr_eq(rtree_get(rtree,
(((uintptr_t)1) << (sizeof(uintptr_t)*8-i))),
(void *)0,
assert_u_eq(rtree_get(rtree,
(((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0,
"Only leftmost rtree leaf should be set; "
"i=%u, j=%u", i, j);
rtree_set(rtree, keys[j], (void *)0);
rtree_set(rtree, keys[j], 0);
}

rtree_delete(rtree);
Expand All @@ -80,22 +78,22 @@ TEST_BEGIN(test_rtree_random)

for (j = 0; j < NSET; j++) {
keys[j] = (uintptr_t)gen_rand64(sfmt);
rtree_set(rtree, keys[j], (void *)1);
assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1,
rtree_set(rtree, keys[j], 1);
assert_u_eq(rtree_get(rtree, keys[j]), 1,
"rtree_get() should return previously set value");
}
for (j = 0; j < NSET; j++) {
assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)1,
assert_u_eq(rtree_get(rtree, keys[j]), 1,
"rtree_get() should return previously set value");
}

for (j = 0; j < NSET; j++) {
rtree_set(rtree, keys[j], (void *)0);
assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0,
rtree_set(rtree, keys[j], 0);
assert_u_eq(rtree_get(rtree, keys[j]), 0,
"rtree_get() should return previously set value");
}
for (j = 0; j < NSET; j++) {
assert_ptr_eq(rtree_get(rtree, keys[j]), (void *)0,
assert_u_eq(rtree_get(rtree, keys[j]), 0,
"rtree_get() should return previously set value");
}

Expand Down

0 comments on commit b954bc5

Please sign in to comment.