From 0fc569cbfc8056617cc3d96575c4b6302b250ca6 Mon Sep 17 00:00:00 2001 From: Peter Conrad Date: Tue, 29 May 2018 16:36:31 +0200 Subject: [PATCH] Remove unused "smaz" compression --- CMakeLists.txt | 1 - include/fc/compress/smaz.hpp | 9 -- src/compress/smaz.cpp | 223 ----------------------------------- tests/compress/compress.cpp | 32 ----- 4 files changed, 265 deletions(-) delete mode 100644 include/fc/compress/smaz.hpp delete mode 100644 src/compress/smaz.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f0dac64a..318b2bd98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -280,7 +280,6 @@ set( fc_sources src/network/rate_limiting.cpp src/network/resolve.cpp src/network/url.cpp - src/compress/smaz.cpp src/compress/zlib.cpp ) diff --git a/include/fc/compress/smaz.hpp b/include/fc/compress/smaz.hpp deleted file mode 100644 index ad60292fe..000000000 --- a/include/fc/compress/smaz.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once -#include - -namespace fc { - - std::string smaz_compress( const std::string& in ); - std::string smaz_decompress( const std::string& compressed ); - -} // namespace fc diff --git a/src/compress/smaz.cpp b/src/compress/smaz.cpp deleted file mode 100644 index da1521acc..000000000 --- a/src/compress/smaz.cpp +++ /dev/null @@ -1,223 +0,0 @@ -#include -#include -#include -#include -namespace fc { - -typedef const char* const_char_ptr; -/* Our compression codebook, used for compression */ -static const_char_ptr Smaz_cb[241] = { -"\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS", -"\002ma\255\002li\227", "\003or \260", "", "\002ll\230\003s t\277", -"\004fromg\002mel", "", "\003its\332", "\001z\333", "\003ingF", "\001>\336", -"\001 \000\003 (\002nc\344", "\002nd=\003 on\312", -"\002ne\213\003hat\276\003re q", "", "\002ngT\003herz\004have\306\003s o\225", -"", "\003ionk\003s a\254\002ly\352", "\003hisL\003 inN\003 be\252", "", -"\003 fo\325\003 of \003 ha\311", "", "\002of\005", -"\003 co\241\002no\267\003 ma\370", "", "", "\003 cl\356\003enta\003 an7", -"\002ns\300\001\"e", "\003n t\217\002ntP\003s, \205", -"\002pe\320\003 we\351\002om\223", "\002on\037", "", "\002y G", "\003 wa\271", -"\003 re\321\002or*", "", "\002=\"\251\002ot\337", "\003forD\002ou[", -"\003 toR", "\003 th\r", "\003 it\366", -"\003but\261\002ra\202\003 wi\363\002<\346", "\002to\024", "\003arew", "\001d\030", -"\002tr\303", "", "\001\n1\003 a \222", "\003f tv\002veo", "\002un\340", "", -"\003e o\242", "\002a \243\002wa\326\001e\002", "\002ur\226\003e a\274", -"\002us\244\003\n\r\n\247", "\002ut\304\003e c\373", "\002we\221", "", "", -"\002wh\302", "\001f,", "", "", "", "\003d t\206", "", "", "\003th \343", -"\001g;", "", "", "\001\r9\003e s\265", "\003e t\234", "", "\003to Y", -"\003e\r\n\236", "\002d \036\001h\022", "", "\001,Q", "\002 a\031", "\002 b^", -"\002\r\n\025\002 cI", "\002 d\245", "\002 e\253", "\002 fh\001i\b\002e \v", -"", "\002 hU\001-\314", "\002 i8", "", "", "\002 l\315", "\002 m{", -"\002f :\002 n\354", "\002 o\035", "\002 p}\001.n\003\r\n\r\250", "", -"\002 r\275", "\002 s>", "\002 t\016", "", "\002g \235\005which+\003whi\367", -"\002 w5", "\001/\305", "\003as \214", "\003at \207", "", "\003who\331", "", -"\001l\026\002h \212", "", "\002, $", "", "\004withV", "", "", "", "\001m-", "", -"", "\002ac\357", "\002ad\350", "\003TheH", "", "", "\004this\233\001n\t", -"", "\002. y", "", "\002alX\003e, \365", "\003tio\215\002be\\", -"\002an\032\003ver\347", "", "\004that0\003tha\313\001o\006", "\003was2", -"\002arO", "\002as.", "\002at'\003the\001\004they\200\005there\322\005theird", -"\002ce\210", "\004were]", "", "\002ch\231\002l \264\001p<", "", "", -"\003one\256", "", "\003he \023\002dej", "\003ter\270", "\002cou", "", -"\002by\177\002di\201\002eax", "", "\002ec\327", "\002edB", "\002ee\353", "", -"", "\001r\f\002n )", "", "", "", "\002el\262", "", "\003in i\002en3", "", -"\002o `\001s\n", "", "\002er\033", "\003is t\002es6", "", "\002ge\371", -"\004.com\375", "\002fo\334\003our\330", "\003ch \301\001t\003", "\002hab", "", -"\003men\374", "", "\002he\020", "", "", "\001u&", "\002hif", "", -"\003not\204\002ic\203", "\003ed @\002id\355", "", "", "\002ho\273", -"\002r K\001vm", "", "", "", "\003t t\257\002il\360", "\002im\342", -"\003en \317\002in\017", "\002io\220", "\002s \027\001wA", "", "\003er |", -"\003es ~\002is%", "\002it/", "", "\002iv\272", "", -"\002t #\ahttp://C\001x\372", "\002la\211", "\001<\341", "\003, a\224" -}; - - -/* Reverse compression codebook, used for decompression */ -static const_char_ptr Smaz_rcb[254] = { -" ", "the", "e", "t", "a", "of", "o", "and", "i", "n", "s", "e ", "r", " th", -" t", "in", "he", "th", "h", "he ", "to", "\r\n", "l", "s ", "d", " a", "an", -"er", "c", " o", "d ", "on", " of", "re", "of ", "t ", ", ", "is", "u", "at", -" ", "n ", "or", "which", "f", "m", "as", "it", "that", "\n", "was", "en", -" ", " w", "es", " an", " i", "\r", "f ", "g", "p", "nd", " s", "nd ", "ed ", -"w", "ed", "http://", "for", "te", "ing", "y ", "The", " c", "ti", "r ", "his", -"st", " in", "ar", "nt", ",", " to", "y", "ng", " h", "with", "le", "al", "to ", -"b", "ou", "be", "were", " b", "se", "o ", "ent", "ha", "ng ", "their", "\"", -"hi", "from", " f", "in ", "de", "ion", "me", "v", ".", "ve", "all", "re ", -"ri", "ro", "is ", "co", "f t", "are", "ea", ". ", "her", " m", "er ", " p", -"es ", "by", "they", "di", "ra", "ic", "not", "s, ", "d t", "at ", "ce", "la", -"h ", "ne", "as ", "tio", "on ", "n t", "io", "we", " a ", "om", ", a", "s o", -"ur", "li", "ll", "ch", "had", "this", "e t", "g ", "e\r\n", " wh", "ere", -" co", "e o", "a ", "us", " d", "ss", "\n\r\n", "\r\n\r", "=\"", " be", " e", -"s a", "ma", "one", "t t", "or ", "but", "el", "so", "l ", "e s", "s,", "no", -"ter", " wa", "iv", "ho", "e a", " r", "hat", "s t", "ns", "ch ", "wh", "tr", -"ut", "/", "have", "ly ", "ta", " ha", " on", "tha", "-", " l", "ati", "en ", -"pe", " re", "there", "ass", "si", " fo", "wa", "ec", "our", "who", "its", "z", -"fo", "rs", ">", "ot", "un", "<", "im", "th ", "nc", "ate", "><", "ver", "ad", -" we", "ly", "ee", " n", "id", " cl", "ac", "il", " 1) h2 += in[1]; - if (inlen > 2) h3 = h2^in[2]; - if (j > inlen) j = inlen; - - /* Try to lookup substrings into the hash table, starting from the - * longer to the shorter substrings */ - for (; j > 0; j--) { - switch(j) { - case 1: slot = Smaz_cb[h1%241]; break; - case 2: slot = Smaz_cb[h2%241]; break; - default: slot = Smaz_cb[h3%241]; break; - } - while(slot[0]) { - if (slot[0] == j && memcmp(slot+1,in,j) == 0) { - /* Match found in the hash table, - * prepare a verbatim bytes flush if needed */ - if (verblen) { - needed = (verblen == 1) ? 2 : 2+verblen; - flush = out; - out += needed; - outlen -= needed; - } - /* Emit the byte */ - if (outlen <= 0) return _outlen+1; - out[0] = slot[slot[0]+1]; - out++; - outlen--; - inlen -= j; - in += j; - goto out; - } else { - slot += slot[0]+2; - } - } - } - /* Match not found - add the byte to the verbatim buffer */ - verb[verblen] = in[0]; - verblen++; - inlen--; - in++; -out: - /* Prepare a flush if we reached the flush length limit, and there - * is not already a pending flush operation. */ - if (!flush && (verblen == 256 || (verblen > 0 && inlen == 0))) { - needed = (verblen == 1) ? 2 : 2+verblen; - flush = out; - out += needed; - outlen -= needed; - if (outlen < 0) return _outlen+1; - } - /* Perform a verbatim flush if needed */ - if (flush) { - if (verblen == 1) { - flush[0] = (signed char)254; - flush[1] = verb[0]; - } else { - flush[0] = (signed char)255; - flush[1] = (signed char)(verblen-1); - memcpy(flush+2,verb,verblen); - } - flush = NULL; - verblen = 0; - } - } - return out-_out; -} - -void smaz_decompress(const char *in, uint32_t inlen, std::stringstream& ss ) { - const unsigned char *c = (const unsigned char*) in; -// char *_out = out; -// int _outlen = outlen; - - while(inlen) { - if (*c == 254) { - /* Verbatim byte */ - //if (outlen < 1) return _outlen+1; - //*out = *(c+1); - ss.put( *(c+1) ); - //out++; - //outlen--; - c += 2; - inlen -= 2; - } else if (*c == 255) { - /* Verbatim string */ - int len = (*(c+1))+1; - //if (outlen < len) return _outlen+1; - ss.write( (const char*)(c+2),len ); - - //memcpy(out,c+2,len); - //out += len; - //outlen -= len; - c += 2+len; - inlen -= 2+len; - } else { - /* Codebook entry */ - const char *s = Smaz_rcb[*c]; - int len = strlen(s); - - //if (outlen < len) return _outlen+1; - //memcpy(out,s,len); - ss.write( s, len ); - //out += len; - //outlen -= len; - c++; - inlen--; - } - } -} - - - - std::string smaz_compress( const std::string& in ) - { - std::string out; - out.resize(in.size()); - auto out_len = smaz_compress( in.c_str(), in.size(), &out[0], out.size() ); - FC_ASSERT( out_len <= out.size() ); - out.resize(out_len); - return out; - } - std::string smaz_decompress( const std::string& compressed ) - { - std::stringstream ss; - smaz_decompress( compressed.c_str(), compressed.length(), ss ); - return ss.str(); - } - -} // namespace fc diff --git a/tests/compress/compress.cpp b/tests/compress/compress.cpp index a30255e98..bc5d3aaee 100644 --- a/tests/compress/compress.cpp +++ b/tests/compress/compress.cpp @@ -2,43 +2,11 @@ #include #include -#include #include #include BOOST_AUTO_TEST_SUITE(compress) -BOOST_AUTO_TEST_CASE(smaz_test) -{ - std::ifstream testfile; - testfile.open("README.md"); - - std::stringstream buffer; - std::string line; - std::getline( testfile, line ); - while( testfile.good() ) - { - buffer << line << "\n"; - try { - std::string compressed = fc::smaz_compress( line ); - std::string decomp = fc::smaz_decompress( compressed ); - BOOST_CHECK_EQUAL( decomp, line ); - } - catch ( fc::exception& e ) - { - std::cout<