Skip to content

Commit

Permalink
Replace ziplist with listpack in quicklist (redis#9740)
Browse files Browse the repository at this point in the history
Part three of implementing redis#8702, following redis#8887 and redis#9366 .

## Description of the feature
1. Replace the ziplist container of quicklist with listpack.
2. Convert existing quicklist ziplists on RDB loading time. an O(n) operation.

## Interface changes
1. New `list-max-listpack-size` config is an alias for `list-max-ziplist-size`.
2. Replace `debug ziplist` command with `debug listpack`.

## Internal changes
1. Add `lpMerge` to merge two listpacks . (same as `ziplistMerge`)
2. Add `lpRepr` to print info of listpack which is used in debugCommand and `quicklistRepr`. (same as `ziplistRepr`)
3. Replace `QUICKLIST_NODE_CONTAINER_ZIPLIST` with `QUICKLIST_NODE_CONTAINER_PACKED`(following redis#9357 ).
    It represent that a quicklistNode is a packed node, as opposed to a plain node.
4. Remove `createZiplistObject` method, which is never used.
5. Calculate listpack entry size using overhead overestimation in `quicklistAllowInsert`.
    We prefer an overestimation, which would at worse lead to a few bytes below the lowest limit of 4k.

## Improvements
1. Calling `lpShrinkToFit` after converting Ziplist to listpack, which was missed at redis#9366.
2. Optimize `quicklistAppendPlainNode` to avoid memcpy data.

## Bugfix
1. Fix crash in `quicklistRepr` when ziplist is compressed, introduced from redis#9366.

## Test
1. Add unittest for `lpMerge`.
2. Modify the old quicklist ziplist corrupt dump test.

Co-authored-by: Oran Agra <oran@redislabs.com>
  • Loading branch information
2 people authored and hwware committed Dec 20, 2021
1 parent ef9e84f commit a0c9eb4
Show file tree
Hide file tree
Showing 17 changed files with 508 additions and 362 deletions.
2 changes: 1 addition & 1 deletion redis.conf
Expand Up @@ -1737,7 +1737,7 @@ hash-max-listpack-value 64
# per list node.
# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),
# but if your use case is unique, adjust the settings as necessary.
list-max-ziplist-size -2
list-max-listpack-size -2

# Lists may also be compressed.
# Compress depth is the number of quicklist ziplist nodes from *each* side of
Expand Down
2 changes: 1 addition & 1 deletion src/config.c
Expand Up @@ -2603,7 +2603,7 @@ standardConfig configs[] = {
createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */
createIntConfig("list-max-ziplist-size", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_ziplist_size, -2, INTEGER_CONFIG, NULL, NULL),
createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL),
createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL),
createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, NULL), /* Default: 1% CPU min (at lower threshold) */
Expand Down
2 changes: 1 addition & 1 deletion src/db.c
Expand Up @@ -847,7 +847,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {

/* Step 2: Iterate the collection.
*
* Note that if the object is encoded with a ziplist, intset, or any other
* Note that if the object is encoded with a listpack, intset, or any other
* representation that is not a hash table, we are sure that it is also
* composed of a small number of elements. So to avoid taking state we
* just return everything inside the object in a single call, setting the
Expand Down
18 changes: 9 additions & 9 deletions src/debug.c
Expand Up @@ -473,8 +473,8 @@ void debugCommand(client *c) {
" Run a fuzz tester against the stringmatchlen() function.",
"STRUCTSIZE",
" Return the size of different Redis core C structures.",
"ZIPLIST <key>",
" Show low level info about the ziplist encoding of <key>.",
"LISTPACK <key>",
" Show low level info about the listpack encoding of <key>.",
"QUICKLIST <key> [<0|1>]",
" Show low level info about the quicklist encoding of <key>."
" The optional argument (0 by default) sets the level of detail",
Expand Down Expand Up @@ -602,8 +602,8 @@ NULL
used = snprintf(nextra, remaining, " ql_avg_node:%.2f", avg);
nextra += used;
remaining -= used;
/* Add quicklist fill level / max ziplist size */
used = snprintf(nextra, remaining, " ql_ziplist_max:%d", ql->fill);
/* Add quicklist fill level / max listpack size */
used = snprintf(nextra, remaining, " ql_listpack_max:%d", ql->fill);
nextra += used;
remaining -= used;
/* Add isCompressed? */
Expand Down Expand Up @@ -653,17 +653,17 @@ NULL
(long long) sdsavail(val->ptr),
(long long) getStringObjectSdsUsedMemory(val));
}
} else if (!strcasecmp(c->argv[1]->ptr,"ziplist") && c->argc == 3) {
} else if (!strcasecmp(c->argv[1]->ptr,"listpack") && c->argc == 3) {
robj *o;

if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
== NULL) return;

if (o->encoding != OBJ_ENCODING_ZIPLIST) {
addReplyError(c,"Not a ziplist encoded object.");
if (o->encoding != OBJ_ENCODING_LISTPACK) {
addReplyError(c,"Not a listpack encoded object.");
} else {
ziplistRepr(o->ptr);
addReplyStatus(c,"Ziplist structure printed on stdout");
lpRepr(o->ptr);
addReplyStatus(c,"Listpack structure printed on stdout");
}
} else if (!strcasecmp(c->argv[1]->ptr,"quicklist") && (c->argc == 3 || c->argc == 4)) {
robj *o;
Expand Down
202 changes: 201 additions & 1 deletion src/listpack.c
Expand Up @@ -1021,7 +1021,7 @@ unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsi
* address again after a reallocation. */
unsigned long poff = first-lp;

/* Move tail to the front of the ziplist */
/* Move tail to the front of the listpack */
memmove(first, tail, eofptr - tail + 1);
lpSetTotalBytes(lp, bytes - (tail - first));
uint32_t numele = lpGetNumElements(lp);
Expand Down Expand Up @@ -1064,6 +1064,103 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {
return lp;
}

/* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.
*
* NOTE: The larger listpack is reallocated to contain the new merged listpack.
* Either 'first' or 'second' can be used for the result. The parameter not
* used will be free'd and set to NULL.
*
* After calling this function, the input parameters are no longer valid since
* they are changed and free'd in-place.
*
* The result listpack is the contents of 'first' followed by 'second'.
*
* On failure: returns NULL if the merge is impossible.
* On success: returns the merged listpack (which is expanded version of either
* 'first' or 'second', also frees the other unused input listpack, and sets the
* input listpack argument equal to newly reallocated listpack return value. */
unsigned char *lpMerge(unsigned char **first, unsigned char **second) {
/* If any params are null, we can't merge, so NULL. */
if (first == NULL || *first == NULL || second == NULL || *second == NULL)
return NULL;

/* Can't merge same list into itself. */
if (*first == *second)
return NULL;

size_t first_bytes = lpBytes(*first);
unsigned long first_len = lpLength(*first);

size_t second_bytes = lpBytes(*second);
unsigned long second_len = lpLength(*second);

int append;
unsigned char *source, *target;
size_t target_bytes, source_bytes;
/* Pick the largest listpack so we can resize easily in-place.
* We must also track if we are now appending or prepending to
* the target listpack. */
if (first_bytes >= second_bytes) {
/* retain first, append second to first. */
target = *first;
target_bytes = first_bytes;
source = *second;
source_bytes = second_bytes;
append = 1;
} else {
/* else, retain second, prepend first to second. */
target = *second;
target_bytes = second_bytes;
source = *first;
source_bytes = first_bytes;
append = 0;
}

/* Calculate final bytes (subtract one pair of metadata) */
unsigned long long lpbytes = (unsigned long long)first_bytes + second_bytes - LP_HDR_SIZE - 1;
assert(lpbytes < UINT32_MAX); /* larger values can't be stored */
unsigned long lplength = first_len + second_len;

/* Combined lp length should be limited within UINT16_MAX */
lplength = lplength < UINT16_MAX ? lplength : UINT16_MAX;

/* Extend target to new lpbytes then append or prepend source. */
target = zrealloc(target, lpbytes);
if (append) {
/* append == appending to target */
/* Copy source after target (copying over original [END]):
* [TARGET - END, SOURCE - HEADER] */
memcpy(target + target_bytes - 1,
source + LP_HDR_SIZE,
source_bytes - LP_HDR_SIZE);
} else {
/* !append == prepending to target */
/* Move target *contents* exactly size of (source - [END]),
* then copy source into vacated space (source - [END]):
* [SOURCE - END, TARGET - HEADER] */
memmove(target + source_bytes - 1,
target + LP_HDR_SIZE,
target_bytes - LP_HDR_SIZE);
memcpy(target, source, source_bytes - 1);
}

lpSetNumElements(target, lplength);
lpSetTotalBytes(target, lpbytes);

/* Now free and NULL out what we didn't realloc */
if (append) {
zfree(*second);
*second = NULL;
*first = target;
} else {
zfree(*first);
*first = NULL;
*second = target;
}

return target;
}

/* Return the total number of bytes the listpack is composed of. */
size_t lpBytes(unsigned char *lp) {
return lpGetTotalBytes(lp);
Expand Down Expand Up @@ -1377,6 +1474,57 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
return picked;
}

/* Print info of listpack which is used in debugCommand */
void lpRepr(unsigned char *lp) {
unsigned char *p, *vstr;
int64_t vlen;
unsigned char intbuf[LP_INTBUF_SIZE];
int index = 0;

printf("{total bytes %zu} {num entries %lu}\n", lpBytes(lp), lpLength(lp));

p = lpFirst(lp);
while(p) {
uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p);
uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);
unsigned long back_len = lpEncodeBacklen(NULL, encoded_size);
printf(
"{\n"
"\taddr: 0x%08lx,\n"
"\tindex: %2d,\n"
"\toffset: %1lu,\n"
"\thdr+entrylen+backlen: %2lu,\n"
"\thdrlen: %3u,\n"
"\tbacklen: %2lu,\n"
"\tpayload: %1u\n",
(long unsigned)p,
index,
(unsigned long) (p-lp),
encoded_size + back_len,
encoded_size_bytes,
back_len,
encoded_size - encoded_size_bytes);
printf("\tbytes: ");
for (unsigned int i = 0; i < (encoded_size + back_len); i++) {
printf("%02x|",p[i]);
}
printf("\n");

vstr = lpGet(p, &vlen, intbuf);
printf("\t[str]");
if (vlen > 40) {
if (fwrite(vstr, 40, 1, stdout) == 0) perror("fwrite");
printf("...");
} else {
if (fwrite(vstr, vlen, 1, stdout) == 0) perror("fwrite");
}
printf("\n}\n");
index++;
p = lpNext(lp, p);
}
printf("{end}\n\n");
}

#ifdef REDIS_TEST

#include <sys/time.h>
Expand Down Expand Up @@ -1845,6 +1993,58 @@ int listpackTest(int argc, char *argv[], int flags) {
lpFree(lp);
}

TEST("lpMerge two empty listpacks") {
unsigned char *lp1 = lpNew(0);
unsigned char *lp2 = lpNew(0);

/* Merge two empty listpacks, get empty result back. */
lp1 = lpMerge(&lp1, &lp2);
assert(lpLength(lp1) == 0);
zfree(lp1);
}

TEST("lpMerge two listpacks - first larger than second") {
unsigned char *lp1 = createIntList();
unsigned char *lp2 = createList();

size_t lp1_bytes = lpBytes(lp1);
size_t lp2_bytes = lpBytes(lp2);
unsigned long lp1_len = lpLength(lp1);
unsigned long lp2_len = lpLength(lp2);

unsigned char *lp3 = lpMerge(&lp1, &lp2);
assert(lp3 == lp1);
assert(lp2 == NULL);
assert(lpLength(lp3) == (lp1_len + lp2_len));
assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));
verifyEntry(lpSeek(lp3, 0), (unsigned char*)"4294967296", 10);
verifyEntry(lpSeek(lp3, 5), (unsigned char*)"much much longer non integer", 28);
verifyEntry(lpSeek(lp3, 6), (unsigned char*)"hello", 5);
verifyEntry(lpSeek(lp3, -1), (unsigned char*)"1024", 4);
zfree(lp3);
}

TEST("lpMerge two listpacks - second larger than first") {
unsigned char *lp1 = createList();
unsigned char *lp2 = createIntList();

size_t lp1_bytes = lpBytes(lp1);
size_t lp2_bytes = lpBytes(lp2);
unsigned long lp1_len = lpLength(lp1);
unsigned long lp2_len = lpLength(lp2);

unsigned char *lp3 = lpMerge(&lp1, &lp2);
assert(lp3 == lp2);
assert(lp1 == NULL);
assert(lpLength(lp3) == (lp1_len + lp2_len));
assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));
verifyEntry(lpSeek(lp3, 0), (unsigned char*)"hello", 5);
verifyEntry(lpSeek(lp3, 3), (unsigned char*)"1024", 4);
verifyEntry(lpSeek(lp3, 4), (unsigned char*)"4294967296", 10);
verifyEntry(lpSeek(lp3, -1), (unsigned char*)"much much longer non integer", 28);
zfree(lp3);
}

TEST("Random pair with one element") {
listpackEntry key, val;
unsigned char *lp = lpNew(0);
Expand Down
2 changes: 2 additions & 0 deletions src/listpack.h
Expand Up @@ -68,6 +68,7 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long
unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
unsigned char *lpMerge(unsigned char **first, unsigned char **second);
unsigned long lpLength(unsigned char *lp);
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
Expand All @@ -88,6 +89,7 @@ void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *k
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
int lpSafeToAdd(unsigned char* lp, size_t add);
void lpRepr(unsigned char *lp);

#ifdef REDIS_TEST
int listpackTest(int argc, char *argv[], int flags);
Expand Down
2 changes: 1 addition & 1 deletion src/module.c
Expand Up @@ -526,7 +526,7 @@ int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
switch(type) {
case REDISMODULE_KEYTYPE_LIST:
obj = createQuicklistObject();
quicklistSetOptions(obj->ptr, server.list_max_ziplist_size,
quicklistSetOptions(obj->ptr, server.list_max_listpack_size,
server.list_compress_depth);
break;
case REDISMODULE_KEYTYPE_ZSET:
Expand Down
7 changes: 0 additions & 7 deletions src/object.c
Expand Up @@ -233,13 +233,6 @@ robj *createQuicklistObject(void) {
return o;
}

robj *createZiplistObject(void) {
unsigned char *zl = ziplistNew();
robj *o = createObject(OBJ_LIST,zl);
o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}

robj *createSetObject(void) {
dict *d = dictCreate(&setDictType);
robj *o = createObject(OBJ_SET,d);
Expand Down

0 comments on commit a0c9eb4

Please sign in to comment.