Skip to content

Commit

Permalink
Transparent LZF compression initial implementation.
Browse files Browse the repository at this point in the history
This commit shapes the main ideas for the implementation but doesn't
fix all the command implementations, nor handles loading of LZF
compressed objects in a way able to perserve the compression.
  • Loading branch information
antirez committed Apr 4, 2014
1 parent 2a7da8a commit f7501fb
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 25 deletions.
1 change: 1 addition & 0 deletions src/bitops.c
Expand Up @@ -261,6 +261,7 @@ void getbitCommand(redisClient *c) {

byte = bitoffset >> 3;
bit = 7 - (bitoffset & 0x7);
if (lzfEncodedObject(o)) o = dbUnshareStringValue(c->db,c->argv[1],o);
if (sdsEncodedObject(o)) {
if (byte < sdslen(o->ptr))
bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);
Expand Down
6 changes: 4 additions & 2 deletions src/debug.c
Expand Up @@ -308,8 +308,10 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
key = dictGetKey(de);

if (val->type != REDIS_STRING || !sdsEncodedObject(val)) {
addReplyError(c,"Not an sds encoded string.");
if (val->type != REDIS_STRING ||
(!sdsEncodedObject(val) && val->encoding != REDIS_ENCODING_LZF))
{
addReplyError(c,"Not an sds/lzf encoded string.");
} else {
addReplyStatusFormat(c,
"key_sds_len:%lld, key_sds_avail:%lld, "
Expand Down
10 changes: 9 additions & 1 deletion src/networking.c
Expand Up @@ -319,6 +319,11 @@ void addReply(redisClient *c, robj *obj) {
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else if (obj->encoding == REDIS_ENCODING_LZF) {
obj = getDecodedObject(obj);
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else {
redisPanic("Wrong obj->encoding in addReply()");
}
Expand Down Expand Up @@ -488,7 +493,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) {

if (sdsEncodedObject(obj)) {
len = sdslen(obj->ptr);
} else {
} else if (obj->encoding == REDIS_ENCODING_INT) {
long n = (long)obj->ptr;

/* Compute how many bytes will take this integer as a radix 10 string */
Expand All @@ -500,6 +505,9 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
while((n = n/10) != 0) {
len++;
}
} else {
/* LZF and others not handled explicitly. */
len = stringObjectLen(obj);
}

if (len < REDIS_SHARED_BULKHDR_LEN)
Expand Down
56 changes: 53 additions & 3 deletions src/object.c
Expand Up @@ -29,6 +29,7 @@
*/

#include "redis.h"
#include "lzf.h" /* LZF compression library */
#include <math.h>
#include <ctype.h>

Expand Down Expand Up @@ -210,7 +211,8 @@ robj *createZsetZiplistObject(void) {
}

void freeStringObject(robj *o) {
if (o->encoding == REDIS_ENCODING_RAW) {
if (o->encoding == REDIS_ENCODING_RAW ||
o->encoding == REDIS_ENCODING_LZF) {
sdsfree(o->ptr);
}
}
Expand Down Expand Up @@ -335,7 +337,9 @@ int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
}
}

/* Try to encode a string object in order to save space */
/* Try to encode a string object in order to save space. */
#define REDIS_ENCODING_LZF_MAX_SIZE (1024*64)
#define REDIS_ENCODING_LZF_MAX_COMPR_SIZE (1024*32)
robj *tryObjectEncoding(robj *o) {
long value;
sds s = o->ptr;
Expand Down Expand Up @@ -394,6 +398,35 @@ robj *tryObjectEncoding(robj *o) {
return emb;
}

/* Try LZF compression for objects up to REDIS_ENCODING_LZF_MAX_SIZE
* and greater than REDIS_ENCODING_EMBSTR_SIZE_LIMIT.
*
* TODO: add fast compressibility test using LZF against a few
* characters and don't going forward if this test does not passes. */
if (len <= REDIS_ENCODING_LZF_MAX_SIZE) {
/* Allocate four more bytes in our buffer since we need to store
* the size of the compressed string as header. */
unsigned char compr[4+REDIS_ENCODING_LZF_MAX_COMPR_SIZE];
size_t comprlen, outlen;

/* Save want to save at least 25% of memory for this to make sense. */
outlen = len-4-(len/4);
if (outlen > REDIS_ENCODING_LZF_MAX_SIZE)
outlen = REDIS_ENCODING_LZF_MAX_SIZE;
comprlen = lzf_compress(s,len,compr+4,outlen);
if (comprlen != 0) {
/* Object successfully compressed within the required space. */
compr[0] = len & 0xff;
compr[1] = (len >> 8) & 0xff;
compr[2] = (len >> 16) & 0xff;
compr[3] = (len >> 24) & 0xff;
if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr);
o->encoding = REDIS_ENCODING_LZF;
o->ptr = sdsnewlen(compr,comprlen+4);
return o;
}
}

/* We can't encode the object...
*
* Do the last try, and at least optimize the SDS string inside
Expand Down Expand Up @@ -428,6 +461,14 @@ robj *getDecodedObject(robj *o) {
ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_LZF) {
int origlen = stringObjectLen(o);
sds orig = sdsnewlen(NULL,origlen);
unsigned char *p = o->ptr;

if (lzf_decompress(p+4,sdslen(o->ptr)-4,orig,origlen) == 0)
redisPanic("LZF error during object decoding.");
return createObject(REDIS_STRING,orig);
} else {
redisPanic("Unknown encoding type");
}
Expand Down Expand Up @@ -501,13 +542,21 @@ int equalStringObjects(robj *a, robj *b) {
}
}

/* Returns the original (uncompressed) size of an LZF encoded object.
* Only called by stringObjectLen() that should be the main interface. */
size_t stringObjectUncompressedLen(robj *o) {
unsigned char *p = o->ptr;
return p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
}

size_t stringObjectLen(robj *o) {
redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
if (sdsEncodedObject(o)) {
return sdslen(o->ptr);
} else if (o->encoding == REDIS_ENCODING_LZF) {
return stringObjectUncompressedLen(o);
} else {
char buf[32];

return ll2string(buf,32,(long)o->ptr);
}
}
Expand Down Expand Up @@ -656,6 +705,7 @@ char *strEncoding(int encoding) {
case REDIS_ENCODING_INTSET: return "intset";
case REDIS_ENCODING_SKIPLIST: return "skiplist";
case REDIS_ENCODING_EMBSTR: return "embstr";
case REDIS_ENCODING_LZF: return "lzf";
default: return "unknown";
}
}
Expand Down
54 changes: 36 additions & 18 deletions src/rdb.c
Expand Up @@ -209,43 +209,56 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}

int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
size_t comprlen, outlen;
/* Save an already compressed object in LZF encoding.
*
* On success the length of the strored object is returned, otherwise
* 0 is returned. */
int rdbSaveLzfStringObject(rio *rdb, unsigned char *out, size_t len, size_t comprlen) {
unsigned char byte;
int n, nwritten = 0;
void *out;

/* We require at least four bytes compression for this to be worth it */
if (len <= 4) return 0;
outlen = len-4;
if ((out = zmalloc(outlen+1)) == NULL) return 0;
comprlen = lzf_compress(s, len, out, outlen);
if (comprlen == 0) {
zfree(out);
return 0;
}
/* Data compressed! Let's save it on disk */
byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;

if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
nwritten += n;

if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
nwritten += n;

if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
nwritten += n;

zfree(out);
return nwritten;

writeerr:
zfree(out);
return -1;
}

/* Try to compress the string at 's' for 'len' bytes using LZF.
* If successful save the object with LZF encoding, otherwise
* returns 0 if the string can't be compressed, or -1 if the
* compressed string can't be saved.
*
* On success the number of bytes used is returned. */
int rdbTrySaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
size_t comprlen, outlen;
void *out;
int retval;

/* We require at least four bytes compression for this to be worth it */
if (len <= 4) return 0;
outlen = len-4;
if ((out = zmalloc(outlen+1)) == NULL) return 0;
comprlen = lzf_compress(s, len, out, outlen);
if (comprlen == 0) {
zfree(out);
return 0;
}
retval = rdbSaveLzfStringObject(rdb,out,len,comprlen);
zfree(out);
return retval;
}

robj *rdbLoadLzfStringObject(rio *rdb) {
unsigned int len, clen;
unsigned char *c = NULL;
Expand Down Expand Up @@ -283,7 +296,7 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
/* Try LZF compression - under 20 bytes it's unable to compress even
* aaaaaaaaaaaaaaaaaa so skip it */
if (server.rdb_compression && len > 20) {
n = rdbSaveLzfStringObject(rdb,s,len);
n = rdbTrySaveLzfStringObject(rdb,s,len);
if (n == -1) return -1;
if (n > 0) return n;
/* Return value of 0 means data can't be compressed, save the old way */
Expand Down Expand Up @@ -324,6 +337,11 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
* object is already integer encoded. */
if (obj->encoding == REDIS_ENCODING_INT) {
return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
} else if (obj->encoding == REDIS_ENCODING_LZF) {
/* Data is already compressed, save it with LZF encoding. */
int len = stringObjectLen(obj);
unsigned char *p = obj->ptr;
return rdbSaveLzfStringObject(rdb,p+4,len,sdslen(obj->ptr)-4);
} else {
redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
Expand Down
4 changes: 3 additions & 1 deletion src/redis.h
Expand Up @@ -172,7 +172,7 @@

/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
* is set to one of this fields for this object. */
* is set to one of this values. */
#define REDIS_ENCODING_RAW 0 /* Raw representation */
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
Expand All @@ -182,6 +182,7 @@
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
#define REDIS_ENCODING_LZF 9 /* LZF compressed string. */

/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
Expand Down Expand Up @@ -1081,6 +1082,7 @@ int collateStringObjects(robj *a, robj *b);
int equalStringObjects(robj *a, robj *b);
unsigned long long estimateObjectIdleTime(robj *o);
#define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR)
#define lzfEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_LZF)

/* Synchronous I/O with timeout */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
Expand Down

0 comments on commit f7501fb

Please sign in to comment.