Skip to content

Using and extending the code

flanglet edited this page Apr 29, 2024 · 24 revisions

Compressing/Decompressing data (C++)

Here is how to compress/decompress a block to/from a file using RLT+TEXT as transform, Huffman as entropy codec, using a block size of 1 MB, 4 jobs and a checksum.

Just create a CompressedOutputStream/CompressedInputStream to write/read compressed data.

Notice that CompressedOutputStream is a std::ostream and CompressedInputStream is a std::istream, so most operations on streams work as usual.

#include <fstream>
#include <iostream>
#include "types.hpp"
#include "InputStream.hpp"
#include "OutputStream.hpp"
#include "io/CompressedInputStream.hpp"
#include "io/CompressedOutputStream.hpp"


using namespace kanzi;
using namespace std;

uint64 testCompress(byte block[], uint length) {
        // Create an OutputStream
        OutputStream* os = new ofstream("compressed.knz", ofstream::out | ofstream::binary);

        // Create a CompressedOutputStream
        CompressedOutputStream cos(*os, "HUFFMAN", "RLT+TEXT", 1024 * 1024, true, 4);

        // Compress block
        cos.write((const char*) block, length);

        // Close CompressedOutputStream
        cos.close();

        // Get number of bytes written
        uint64 written = cos.getWritten();
        delete os;
        return written;
}

uint64 testDecompress(byte block[], uint length) {
        // Create an InputStream
        InputStream* is = new ifstream("compressed.knz", ifstream::in | ifstream::binary);

        // Create a CompressedInputStream
        CompressedInputStream cis(*is, 4);

        // Decompress block
        cis.read((char*) block, length);

        // Close CompressedInputStream
        cis.close();

        // Get number of bytes read
        uint64 read = cis.getRead();
        delete is;
        return read;
}


int main(int argc, const char** argv)
{
    byte block[65536];
    FILE* in = fopen("/tmp/enwik8", "rb");
    const int sz = fread(block, 1, 65536, in);

    if (sz > 0) {
       uint64 c = testCompress(block, sz);
       cout << "Block compressed from " << r << " bytes to " << c << " bytes" << endl; 
       testDecompress(block, sz);
    }

    return 0;
}

Compressing/Decompressing data (C)

Kanzi exposes a C API (see api/libapi.hpp) and can be built as a static (.a) or a dynamic library (.so/.dll).

Below is an example of a C program compressing and decompressing data using the C API.

/* EG. gcc testAPI.c -o r:\testAPI.exe -lkanzi */
/* EG. gcc testAPI.c -o testAPI -lkanzi */

#include "api/libapi.hpp"
#include <stdlib.h>

int testDecompress()
{
    const int blkSize = 4 * 1024 * 1024;
    struct dData dd = { blkSize, 4, 0 };
    FILE* in = fopen("/tmp/enwik8.knz", "rb");
    struct dContext* ctx;
    int res = 0;

    if ((res = initDecompressor(&dd, in, &ctx)) != 0) {
        fclose(in);
        return res;
    }

    FILE* out = fopen("/tmp/enwik8.knz.bak", "wb");
    int r = 0, w = 0, inSize = 0, outSize = 0;
    BYTE* dst = (BYTE*)malloc(blkSize);

    do {
        w = blkSize;
        r = decompress(ctx, dst, &r, &w);

        if ((w = fwrite(dst, 1, w, out)) == 0)
            break;

        inSize += r;
        outSize += w;
    } while (r == 0);

    if ((res = disposeDecompressor(ctx)) == 0) {
       printf("Size before decompression: %i bytes(s)\n", inSize);
       printf("Size after decompression:  %i bytes(s)\n", outSize);
    }

    fclose(in);
    fclose(out);
    free(dst);
    return res;
}

int testCompress()
{
    const int blkSize = 4 * 1024 * 1024;
    struct cData cd = { "BWT+RANK+MTFT", "FPAQ", blkSize, 4, 1, 0 };
    FILE* in = fopen("/tmp/enwik8", "rb");
    struct cContext* ctx;
    int res = 0;

    if ((res = initCompressor(&cd, out, &ctx)) != 0) {
        fclose(in);
        return res;
    }

    FILE* out = fopen("/tmp/enwik8.knz", "wb");
    int r = 0, w = 0, inSize = 0, outSize = 0;
    BYTE* src = (BYTE*)malloc(blkSize);

    while ((r = fread(src, 1, blkSize, in)) != 0) {
        if ((res = compress(ctx, src, &r, &w)) != 0)
            break;

        inSize += r;
        outSize += w;
    }

    if ((res = disposeCompressor(ctx, &w)) == 0) {
       outSize += w;
       printf("Size before compression: %i bytes(s)\n", inSize);
       printf("Size after compression:  %i bytes(s)\n", outSize);
    }

    fclose(in);
    fclose(out);
    free(src);
    return res;
}

int main(int argc, const char** argv)
{
    int res;

    if ((res = testCompress()) != 0)
        return res;

    if ((res = testDecompress()) != 0)
        return res;

    return 0;
}

Implementing a new transform

Here is how to implement and add a new transform to kanzi.

  • Step 1: write the transform code

For example:

#include "../Context.hpp"
#include "../Transform.hpp"

   class SuperDuperTransform : public Transform<byte> 
   {
   public:
       SuperDuperTransform() {}
       SuperDuperTransform(Context&) {}
       ~SuperDuperTransform() {}

       bool forward(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
             // Ensure enough room in the destination buffer
             if (output._length - output._index < getMaxEncodedLength(length))
                  return false;

             byte* src = &input._array[input._index];
             byte* dst = &output._array[output._index];

             for (int i = 0; i < length; i++)
                 dst[i] = src[i] ^ byte(0xAA);

             input._index += length;
             output._index += length;
             return true; 
       }

       bool inverse(SliceArray<byte>& input, SliceArray<byte>& output, int length) THROW { 
             byte* src = &input._array[input._index];
             byte* dst = &output._array[output._index];

             for (int i = 0; i < length; i++)
                 dst[i] = src[i] ^ byte(0xAA);

             input._index += length;
             output._index += length;
             return true; 
       }

       int getMaxEncodedLength(int inputLen) const { return inputLen; }
   };

Always provide a constructor with a Context: the context contains all the application wide information (such as block size, number of jobs, input & output names, etc ...). Always inherit from Transform<T> and respect the maximum number of jobs provided in the context. Implement forward and inverse methods as well as getMaxEncodedLength(int). Do not write to stdio or stderr. Be aware that your code must be multi-thread safe.

  • Step 2: Register the transform in transform/TransformFactory.hpp

Add the type, say

 static const uint64 SUPERDUPER_TYPE = 63; 

Let us say you use the name "SUPERDUPER" for the transform. Update the following methods:

 template <class T> uint64 TransformFactory<T>::getTypeToken(const char* tName) THROW
 template <class T> Transform<T>* TransformFactory<T>::newToken(Context& ctx, uint64 functionType) THROW
 template <class T> const char* TransformFactory<T>::getNameToken(uint64 functionType) THROW
  • Step 3: Update the help message in app/Kanzi.cpp

In Kanzi::printHelp, add the SUPERDUPER transform to the list in the -t option section.

  • Step 4: Update the makefile and rebuild the binary
  • This is it. For example, run
    kanzi -i foo.txt -f -t SUPERDUPER -e none -j 2 -v 4