Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Massive changes and additions everywhere, including c++ code for about

everything, and a nearly code-complete (though not debugged) efficient
huffman decoder/encoder.
  • Loading branch information...
commit 961f413c5de4e7e3bcf876df5703cb097ddbb945 1 parent bf12149
Roberto Peon authored
View
8 example_code/Makefile
@@ -5,14 +5,14 @@ OPT = -O0
CXXFLAGS = $(DEBUG) $(OPT) -std=c++0x
TARGETS = bit_bucket_test huffman_test
-#.cc.o:
-# $(CXX) $(CXXFLAGS) $< -o $@
+.cc.o:
+ $(CXX) $(CXXFLAGS) $< -o $@
-bit_bucket_test: bit_bucket.cc bit_bucket_test.cc
+bit_bucket_test: bit_bucket.cc bit_bucket_test.cc header_freq_tables.cc header_freq_tables.h
$(CXX) $(CXXFLAGS) bit_bucket_test.cc -o $@
-huffman_test: huffman.cc huffman_test.cc
+huffman_test: huffman.cc huffman_test.cc bit_bucket_test
$(CXX) $(CXXFLAGS) huffman_test.cc -o $@
all: $(TARGETS)
View
79 example_code/bit_bucket.cc
@@ -16,26 +16,50 @@ using std::flush;
using std::min;
using std::max;
-void FormatUIAsBits(unsigned int c, string* retval, int bits) {
- retval->push_back('|');
- for (int i = 0; i < bits; ++i) {
- if (c & (0x80 >> i)) {
- retval->push_back('1');
- } else {
- retval->push_back('0');
- }
- }
+template <typename T>
+string FormatAsBits(const T& v, int num_bits, int offset = 0) {
+ stringstream retval;
+ for (int i = 0; i < num_bits; ++i) {
+ int byte_idx = i / 8;
+ unsigned int c = v[byte_idx];
+ if ((i + offset) % 8 == 0)
+ retval << "|";
+ retval << ((c & (0x80U >> (i % 8))) > 0);
+ }
+ return retval.str();
}
-template <typename T>
-string FormatAsBits(const T& v, int num_bits) {
- string retval;
- for (int i = 0; i < num_bits / 8; ++i) {
- FormatUIAsBits(static_cast<unsigned int>(v[i]), &retval, 8);
- }
- if (num_bits % 8)
- FormatUIAsBits(static_cast<unsigned int>(v.back()), &retval, num_bits % 8);
- return retval;
+template <>
+string FormatAsBits<uint32_t>(const uint32_t& v, int num_bits, int offset) {
+ stringstream retval;
+ for (int i = 0; i < num_bits; ++i) {
+ if ((i + offset) % 8 == 0)
+ retval << "|";
+ retval << (((v >> (31 - i)) & 0x1U) > 0);
+ }
+ return retval.str();
+}
+
+template <>
+string FormatAsBits<uint16_t>(const uint16_t& v, int num_bits, int offset) {
+ stringstream retval;
+ for (int i = 0; i < num_bits; ++i) {
+ if ((i + offset) % 8 == 0)
+ retval << "|";
+ retval << (((v >> (15 - i)) & 0x1U) > 0);
+ }
+ return retval.str();
+}
+
+template <>
+string FormatAsBits<uint8_t>(const uint8_t& v, int num_bits, int offset) {
+ stringstream retval;
+ for (int i = 0; i < num_bits; ++i) {
+ if ((i + offset) % 8 == 0)
+ retval << "|";
+ retval << (((v >> (7 - i)) & 0x1U) > 0);
+ }
+ return retval.str();
}
class BitBucket {
@@ -154,22 +178,27 @@ class BitBucket {
retval->insert(retval->end(), bsa.begin() + idx_byte, bsa.begin() + idx_byte + output_bytes);
idx_byte += num_bits / 8;
idx_boff = num_bits % 8;
- retval->back() &= ~(0xff >> idx_boff);
+ if (idx_boff) {
+ retval->back() &= ~(0xff >> idx_boff);
+ }
} else { // idx_boff != 0. There WILL be shifting about.
int idx_leftover = 8 - idx_boff;
while (bits_left >= 8) {
- char c = bsa[idx_byte] << idx_boff;
+ unsigned int c = bsa[idx_byte] << idx_boff;
++idx_byte;
c |= bsa[idx_byte] >> idx_leftover;
- retval->push_back(c);
+ // cout << "ONE_BYTE DOWN, BITS_LEFT:" << bits_left
+ // << " " << FormatAsBits(&c, 8) << "\n";
+ retval->push_back((char)c);
bits_left -= 8;
}
if (bits_left) {
+ //cout << "BITS LEFT: " << bits_left << "\n";
int cur_boff = 0;
- char cur_byte = 0;
+ unsigned int cur_byte = 0;
while (true) {
int bits_to_consume = min(min(8 - cur_boff, idx_leftover), bits_left);
- char mask = ~(0xff >> bits_to_consume);
+ unsigned int mask = ~(0xff >> bits_to_consume);
cur_byte |= ((bsa[idx_byte] << idx_boff) & mask) >> cur_boff;
bits_left -= bits_to_consume;
idx_boff += bits_to_consume;
@@ -186,7 +215,9 @@ class BitBucket {
abort();
}
if (bits_left == 0) {
- retval->push_back(cur_byte);
+ // cout << "BITS LEFT: " << bits_left
+ // << " " << FormatAsBits(&cur_byte, 8) << "\n";
+ retval->push_back((char)cur_byte);
break;
}
}
View
3  example_code/bit_bucket.py
@@ -12,6 +12,9 @@ def Clear(self):
self.idx_byte = 0
self.idx_boff = 0
+ def StoreBit(self, bit):
+ self.StoreBits( ([bit << 7], 1) )
+
def StoreBits(self, input):
(inp_bytes, inp_bits) = input
old_out_boff = self.out_boff
View
BIN  example_code/bit_bucket_test
Binary file not shown
View
39 example_code/bit_bucket_test.cc
@@ -27,6 +27,45 @@ void TestStoreBits(BitBucket* bb, const Testcase& test) {
void TestGetBits(BitBucket*bb, const Testcase& test) {
bb->Seek(0);
+ vector<char> storage;
+ int num_bits = bb->NumBits();
+ bb->GetBits(&storage, num_bits);
+ stringstream formatted_bits;
+ formatted_bits << FormatAsBits(storage, num_bits);
+ formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]";
+ if (formatted_bits.str() != test.expected_state) {
+ cerr << "\n";
+ cerr << " --- FAILED ---\n";
+ cerr << " Expected: \"" << test.expected_state << "\"\n";
+ cerr << " Got: \"" << formatted_bits.str() << "\"\n";
+ cerr << " DEBUG: " << bb->DebugStr() << "\n";
+ abort();
+ }
+ // Now, do it again, starting from bit offsets other than 0
+ for (int i = 1; i < min(8, num_bits); ++i) {
+ bb->Seek(0);
+ formatted_bits.str("");
+ for (int j = 0; j < i; ++j) {
+ if (j % 8 == 0) {
+ formatted_bits << "|";
+ }
+ formatted_bits << bb->GetBit();
+ }
+ storage.clear();
+ bb->GetBits(&storage, num_bits - i);
+ string storage_str = FormatAsBits(storage, num_bits - i, i);
+ formatted_bits << FormatAsBits(storage, num_bits - i, i);
+ formatted_bits << " [" << num_bits << "," << num_bits % 8 << "]";
+ if (formatted_bits.str() != test.expected_state) {
+ cerr << "\n";
+ cerr << " --- FAILED ---\n";
+ cerr << " Offset: " << i << "\n";
+ cerr << " Expected: \"" << test.expected_state << "\"\n";
+ cerr << " Got: \"" << formatted_bits.str() << "\"\n";
+ cerr << " DEBUG: " << bb->DebugStr() << "\n";
+ //abort();
+ }
+ }
}
void RunTests(const vector<Testcase>& tests) {
View
257 example_code/canonical-code-table.txt
@@ -0,0 +1,257 @@
+ 5 0 00000--------------------------- '.'
+ 5 1 00001--------------------------- '/'
+ 5 2 00010--------------------------- '0'
+ 5 3 00011--------------------------- '1'
+ 5 4 00100--------------------------- '2'
+ 5 5 00101--------------------------- '3'
+ 5 6 00110--------------------------- '4'
+ 5 7 00111--------------------------- '6'
+ 5 8 01000--------------------------- '='
+ 5 9 01001--------------------------- 'a'
+ 5 10 01010--------------------------- 'c'
+ 5 11 01011--------------------------- 'e'
+ 5 12 01100--------------------------- 'i'
+ 5 13 01101--------------------------- 'm'
+ 5 14 01110--------------------------- 'n'
+ 5 15 01111--------------------------- 'o'
+ 5 16 10000--------------------------- 's'
+ 5 17 10001--------------------------- 't'
+ 6 36 100100-------------------------- '%'
+ 6 37 100101-------------------------- '&'
+ 6 38 100110-------------------------- '-'
+ 6 39 100111-------------------------- '5'
+ 6 40 101000-------------------------- '7'
+ 6 41 101001-------------------------- '8'
+ 6 42 101010-------------------------- '9'
+ 6 43 101011-------------------------- '_'
+ 6 44 101100-------------------------- 'b'
+ 6 45 101101-------------------------- 'd'
+ 6 46 101110-------------------------- 'g'
+ 6 47 101111-------------------------- 'h'
+ 6 48 110000-------------------------- 'l'
+ 6 49 110001-------------------------- 'p'
+ 6 50 110010-------------------------- 'r'
+ 6 51 110011-------------------------- 256
+ 7 104 1101000------------------------- ','
+ 7 105 1101001------------------------- 'A'
+ 7 106 1101010------------------------- 'C'
+ 7 107 1101011------------------------- 'D'
+ 7 108 1101100------------------------- 'f'
+ 7 109 1101101------------------------- 'j'
+ 7 110 1101110------------------------- 'k'
+ 7 111 1101111------------------------- 'u'
+ 7 112 1110000------------------------- 'v'
+ 7 113 1110001------------------------- 'w'
+ 7 114 1110010------------------------- 'x'
+ 7 115 1110011------------------------- 'y'
+ 8 232 11101000------------------------ '?'
+ 8 233 11101001------------------------ 'B'
+ 8 234 11101010------------------------ 'E'
+ 8 235 11101011------------------------ 'F'
+ 8 236 11101100------------------------ 'G'
+ 8 237 11101101------------------------ 'H'
+ 8 238 11101110------------------------ 'I'
+ 8 239 11101111------------------------ 'L'
+ 8 240 11110000------------------------ 'N'
+ 8 241 11110001------------------------ 'O'
+ 8 242 11110010------------------------ 'P'
+ 8 243 11110011------------------------ 'R'
+ 8 244 11110100------------------------ 'S'
+ 8 245 11110101------------------------ 'T'
+ 8 246 11110110------------------------ 'U'
+ 8 247 11110111------------------------ 'V'
+ 8 248 11111000------------------------ 'X'
+ 8 249 11111001------------------------ 'q'
+ 8 250 11111010------------------------ 'z'
+ 9 502 111110110----------------------- ':'
+ 9 503 111110111----------------------- ';'
+ 9 504 111111000----------------------- 'J'
+ 9 505 111111001----------------------- 'K'
+ 9 506 111111010----------------------- 'M'
+ 9 507 111111011----------------------- 'Q'
+ 9 508 111111100----------------------- 'W'
+ 9 509 111111101----------------------- 'Y'
+ 9 510 111111110----------------------- 'Z'
+11 2044 11111111100--------------------- '!'
+12 4090 111111111010-------------------- ' '
+12 4091 111111111011-------------------- '('
+12 4092 111111111100-------------------- ')'
+12 4093 111111111101-------------------- '*'
+13 8188 1111111111100------------------- '{'
+13 8189 1111111111101------------------- '}'
+14 16380 11111111111100------------------ '~'
+15 32762 111111111111010----------------- '$'
+15 32763 111111111111011----------------- '''
+15 32764 111111111111100----------------- '+'
+15 32765 111111111111101----------------- '['
+15 32766 111111111111110----------------- ']'
+22 4194176 1111111111111110000000---------- '\xaa'
+22 4194177 1111111111111110000001---------- '\xab'
+22 4194178 1111111111111110000010---------- '\xac'
+22 4194179 1111111111111110000011---------- '\xad'
+22 4194180 1111111111111110000100---------- '\xae'
+22 4194181 1111111111111110000101---------- '\xaf'
+22 4194182 1111111111111110000110---------- '\xb0'
+22 4194183 1111111111111110000111---------- '\xb1'
+22 4194184 1111111111111110001000---------- '\xb2'
+22 4194185 1111111111111110001001---------- '\xb3'
+22 4194186 1111111111111110001010---------- '\xb4'
+22 4194187 1111111111111110001011---------- '\xb5'
+22 4194188 1111111111111110001100---------- '\xb6'
+22 4194189 1111111111111110001101---------- '\xb7'
+22 4194190 1111111111111110001110---------- '\xb8'
+22 4194191 1111111111111110001111---------- '\xb9'
+22 4194192 1111111111111110010000---------- '\xba'
+22 4194193 1111111111111110010001---------- '\xbb'
+22 4194194 1111111111111110010010---------- '\xbc'
+22 4194195 1111111111111110010011---------- '\xbd'
+22 4194196 1111111111111110010100---------- '\xbe'
+22 4194197 1111111111111110010101---------- '\xbf'
+22 4194198 1111111111111110010110---------- '\xc0'
+22 4194199 1111111111111110010111---------- '\xc1'
+22 4194200 1111111111111110011000---------- '\xc2'
+22 4194201 1111111111111110011001---------- '\xc3'
+22 4194202 1111111111111110011010---------- '\xc4'
+22 4194203 1111111111111110011011---------- '\xc5'
+22 4194204 1111111111111110011100---------- '\xc6'
+22 4194205 1111111111111110011101---------- '\xc7'
+22 4194206 1111111111111110011110---------- '\xc8'
+22 4194207 1111111111111110011111---------- '\xc9'
+22 4194208 1111111111111110100000---------- '\xca'
+22 4194209 1111111111111110100001---------- '\xcb'
+22 4194210 1111111111111110100010---------- '\xcc'
+22 4194211 1111111111111110100011---------- '\xcd'
+22 4194212 1111111111111110100100---------- '\xce'
+22 4194213 1111111111111110100101---------- '\xcf'
+22 4194214 1111111111111110100110---------- '\xd0'
+22 4194215 1111111111111110100111---------- '\xd1'
+22 4194216 1111111111111110101000---------- '\xd2'
+22 4194217 1111111111111110101001---------- '\xd3'
+22 4194218 1111111111111110101010---------- '\xd4'
+22 4194219 1111111111111110101011---------- '\xd5'
+22 4194220 1111111111111110101100---------- '\xd6'
+22 4194221 1111111111111110101101---------- '\xd7'
+22 4194222 1111111111111110101110---------- '\xd8'
+22 4194223 1111111111111110101111---------- '\xd9'
+22 4194224 1111111111111110110000---------- '\xda'
+22 4194225 1111111111111110110001---------- '\xdb'
+22 4194226 1111111111111110110010---------- '\xdc'
+22 4194227 1111111111111110110011---------- '\xdd'
+22 4194228 1111111111111110110100---------- '\xde'
+22 4194229 1111111111111110110101---------- '\xdf'
+22 4194230 1111111111111110110110---------- '\xe0'
+22 4194231 1111111111111110110111---------- '\xe1'
+22 4194232 1111111111111110111000---------- '\xe2'
+22 4194233 1111111111111110111001---------- '\xe3'
+22 4194234 1111111111111110111010---------- '\xe4'
+22 4194235 1111111111111110111011---------- '\xe5'
+22 4194236 1111111111111110111100---------- '\xe6'
+22 4194237 1111111111111110111101---------- '\xe7'
+22 4194238 1111111111111110111110---------- '\xe8'
+22 4194239 1111111111111110111111---------- '\xe9'
+22 4194240 1111111111111111000000---------- '\xea'
+22 4194241 1111111111111111000001---------- '\xeb'
+22 4194242 1111111111111111000010---------- '\xec'
+22 4194243 1111111111111111000011---------- '\xed'
+22 4194244 1111111111111111000100---------- '\xee'
+22 4194245 1111111111111111000101---------- '\xef'
+22 4194246 1111111111111111000110---------- '\xf0'
+22 4194247 1111111111111111000111---------- '\xf1'
+22 4194248 1111111111111111001000---------- '\xf2'
+22 4194249 1111111111111111001001---------- '\xf3'
+22 4194250 1111111111111111001010---------- '\xf4'
+22 4194251 1111111111111111001011---------- '\xf5'
+22 4194252 1111111111111111001100---------- '\xf6'
+22 4194253 1111111111111111001101---------- '\xf7'
+22 4194254 1111111111111111001110---------- '\xf8'
+22 4194255 1111111111111111001111---------- '\xf9'
+22 4194256 1111111111111111010000---------- '\xfa'
+22 4194257 1111111111111111010001---------- '\xfb'
+22 4194258 1111111111111111010010---------- '\xfc'
+22 4194259 1111111111111111010011---------- '\xfd'
+22 4194260 1111111111111111010100---------- '\xfe'
+22 4194261 1111111111111111010101---------- '\xff'
+23 8388524 11111111111111110101100--------- '\0'
+23 8388525 11111111111111110101101--------- '\x01'
+23 8388526 11111111111111110101110--------- '\x02'
+23 8388527 11111111111111110101111--------- '\x03'
+23 8388528 11111111111111110110000--------- '\x04'
+23 8388529 11111111111111110110001--------- '\x05'
+23 8388530 11111111111111110110010--------- '\x06'
+23 8388531 11111111111111110110011--------- '\x07'
+23 8388532 11111111111111110110100--------- '\x08'
+23 8388533 11111111111111110110101--------- '\t'
+23 8388534 11111111111111110110110--------- '\n'
+23 8388535 11111111111111110110111--------- '\x0b'
+23 8388536 11111111111111110111000--------- '\x0c'
+23 8388537 11111111111111110111001--------- '\r'
+23 8388538 11111111111111110111010--------- '\x0e'
+23 8388539 11111111111111110111011--------- '\x0f'
+23 8388540 11111111111111110111100--------- '\x10'
+23 8388541 11111111111111110111101--------- '\x11'
+23 8388542 11111111111111110111110--------- '\x12'
+23 8388543 11111111111111110111111--------- '\x13'
+23 8388544 11111111111111111000000--------- '\x14'
+23 8388545 11111111111111111000001--------- '\x15'
+23 8388546 11111111111111111000010--------- '\x16'
+23 8388547 11111111111111111000011--------- '\x17'
+23 8388548 11111111111111111000100--------- '\x18'
+23 8388549 11111111111111111000101--------- '\x19'
+23 8388550 11111111111111111000110--------- '\x1a'
+23 8388551 11111111111111111000111--------- '\x1b'
+23 8388552 11111111111111111001000--------- '\x1c'
+23 8388553 11111111111111111001001--------- '\x1d'
+23 8388554 11111111111111111001010--------- '\x1e'
+23 8388555 11111111111111111001011--------- '\x1f'
+23 8388556 11111111111111111001100--------- '"'
+23 8388557 11111111111111111001101--------- '#'
+23 8388558 11111111111111111001110--------- '<'
+23 8388559 11111111111111111001111--------- '>'
+23 8388560 11111111111111111010000--------- '@'
+23 8388561 11111111111111111010001--------- '\'
+23 8388562 11111111111111111010010--------- '^'
+23 8388563 11111111111111111010011--------- '`'
+23 8388564 11111111111111111010100--------- '|'
+23 8388565 11111111111111111010101--------- '\x7f'
+23 8388566 11111111111111111010110--------- '\x80'
+23 8388567 11111111111111111010111--------- '\x81'
+23 8388568 11111111111111111011000--------- '\x82'
+23 8388569 11111111111111111011001--------- '\x83'
+23 8388570 11111111111111111011010--------- '\x84'
+23 8388571 11111111111111111011011--------- '\x85'
+23 8388572 11111111111111111011100--------- '\x86'
+23 8388573 11111111111111111011101--------- '\x87'
+23 8388574 11111111111111111011110--------- '\x88'
+23 8388575 11111111111111111011111--------- '\x89'
+23 8388576 11111111111111111100000--------- '\x8a'
+23 8388577 11111111111111111100001--------- '\x8b'
+23 8388578 11111111111111111100010--------- '\x8c'
+23 8388579 11111111111111111100011--------- '\x8d'
+23 8388580 11111111111111111100100--------- '\x8e'
+23 8388581 11111111111111111100101--------- '\x8f'
+23 8388582 11111111111111111100110--------- '\x90'
+23 8388583 11111111111111111100111--------- '\x91'
+23 8388584 11111111111111111101000--------- '\x92'
+23 8388585 11111111111111111101001--------- '\x93'
+23 8388586 11111111111111111101010--------- '\x94'
+23 8388587 11111111111111111101011--------- '\x95'
+23 8388588 11111111111111111101100--------- '\x96'
+23 8388589 11111111111111111101101--------- '\x97'
+23 8388590 11111111111111111101110--------- '\x98'
+23 8388591 11111111111111111101111--------- '\x99'
+23 8388592 11111111111111111110000--------- '\x9a'
+23 8388593 11111111111111111110001--------- '\x9b'
+23 8388594 11111111111111111110010--------- '\x9c'
+23 8388595 11111111111111111110011--------- '\x9d'
+23 8388596 11111111111111111110100--------- '\x9e'
+23 8388597 11111111111111111110101--------- '\x9f'
+23 8388598 11111111111111111110110--------- '\xa0'
+23 8388599 11111111111111111110111--------- '\xa1'
+23 8388600 11111111111111111111000--------- '\xa2'
+23 8388601 11111111111111111111001--------- '\xa3'
+23 8388602 11111111111111111111010--------- '\xa4'
+23 8388603 11111111111111111111011--------- '\xa5'
+23 8388604 11111111111111111111100--------- '\xa6'
+23 8388605 11111111111111111111101--------- '\xa7'
+23 8388606 11111111111111111111110--------- '\xa8'
+23 8388607 11111111111111111111111--------- '\xa9'
View
4 example_code/header_freq_tables.cc
@@ -5,7 +5,7 @@
using std::vector;
using std::pair;
-vector<pair<unsigned int, long> > FreqTables::request_freq_table = {
+vector<pair<uint16_t, uint32_t> > FreqTables::request_freq_table = {
{0x00U, 0},
{0x01U, 0},
{0x02U, 0},
@@ -265,7 +265,7 @@ vector<pair<unsigned int, long> > FreqTables::request_freq_table = {
{256, 1093},
};
-vector<pair<unsigned int, long> > FreqTables::response_freq_table = {
+vector<pair<uint16_t, uint32_t> > FreqTables::response_freq_table = {
{0x00, 57},
{0x01, 0},
{0x02, 0},
View
7 example_code/header_freq_tables.h
@@ -1,15 +1,18 @@
#ifndef HEADER_FREQ_TABLES
#define HEADER_FREQ_TABLES
+#include <stdint.h>
+
#include <vector>
#include <utility>
+
using std::vector;
using std::pair;
struct FreqTables {
public:
- static vector<pair<unsigned int, long> > request_freq_table;
- static vector<pair<unsigned int, long> > response_freq_table;
+ static vector<pair<uint16_t, uint32_t> > request_freq_table;
+ static vector<pair<uint16_t, uint32_t> > response_freq_table;
};
#endif
View
435 example_code/huffman.cc
@@ -1,5 +1,7 @@
#include <stdlib.h>
#include <assert.h>
+#include <stdint.h>
+#include <float.h>
#include <deque>
#include <utility>
@@ -10,6 +12,9 @@
#include "pretty_print_tree.cc"
#include "bit_bucket.cc"
#include <array>
+#include <limits>
+#include <map>
+#include <iomanip>
using std::deque;
using std::pair;
@@ -22,57 +27,68 @@ using std::dec;
using std::sort;
using std::array;
using std::ios;
+using std::map;
+using std::make_pair;
+using std::lower_bound;
+using std::upper_bound;
+using std::setw;
-void OutputCharToOstream(ostream& os, unsigned int c) {
+void OutputCharToOstream(ostream& os, unsigned short c) {
if (c > 256 + 1)
abort();
- os << " '";
- if (c < 128 && (isgraph(c) || c == ' ')) {
- os << (char)c;
+ if (c >= 256) {
+ os << c;
} else {
- switch (c) {
- case '\t':
- os << "\\t";
- break;
- case '\n':
- os << "\\n";
- break;
- case '\r':
- os << "\\r";
- break;
- case '\0':
- os << "\\0";
- break;
- default:
- if (c < 256) {
+ os << " '";
+ if (c < 128 && (isgraph(c) || c == ' ')) {
+ os << (char)c;
+ } else {
+ switch (c) {
+ case '\t':
+ os << "\\t";
+ break;
+ case '\n':
+ os << "\\n";
+ break;
+ case '\r':
+ os << "\\r";
+ break;
+ case '\0':
+ os << "\\0";
+ break;
+ default:
if (c >= 16) {
os << "\\x" << hex << c << dec;
} else {
os << "\\x0" << hex << c << dec;
}
- } else {
- os << c;
- }
- break;
+ break;
+ }
}
+ os << "'";
}
- os << "'";
}
-class Huffman {
+string ReadableUShort(uint16_t c) {
+ stringstream s;
+ OutputCharToOstream(s, c);
+ return s.str();
+}
+class Huffman {
+ private:
struct Node {
double weight;
Node* children[2];
- Node* parent;
- unsigned int c;
+ uint32_t depth;
+ unsigned short c;
bool terminal;
- explicit Node() : weight(0), parent(0), c(0), terminal(false) {
+ explicit Node() : weight(0), depth(0), c(0), terminal(false) {
children[0] = children[1] = 0;
}
- explicit Node(unsigned int c, double weight) :
- weight(weight + 1.0/256.0), parent(0), c(c), terminal(true) {
+ explicit Node(unsigned short c, double weight) :
+ weight(weight), depth(0), c(c), terminal(true) {
children[0] = children[1] = 0;
}
friend ostream& operator<<(ostream& os, const Node& leaf) {
@@ -85,23 +101,41 @@ class Huffman {
}
};
+ struct VecAndLen {
+ vector<char> vec;
+ int len;
+ uint32_t val;
+ VecAndLen() : len(0), val(0) {}
+ explicit VecAndLen(int len) : len(len), val(0) {}
+ };
+
+ typedef array<VecAndLen, 256+1> CodeTable;
+
void GetNextNode(Node* current_leaf, int child_idx,
deque<Node*> *leaves, deque<Node*> *internals) {
assert (current_leaf->children[child_idx] == 0);
if (internals->size() && leaves->size()) {
if (leaves->front()->weight <= internals->front()->weight) {
current_leaf->children[child_idx] = leaves->front();
+ current_leaf->depth = max(current_leaf->depth,
+ leaves->front()->depth + 1);
leaves->pop_front();
} else {
current_leaf->children[child_idx] = internals->front();
+ current_leaf->depth = max(current_leaf->depth,
+ internals->front()->depth + 1);
internals->pop_front();
}
} else if (internals->size()) {
current_leaf->children[child_idx] = internals->front();
+ current_leaf->depth = max(current_leaf->depth,
+ internals->front()->depth + 1);
internals->pop_front();
} else {
assert(leaves->size() != 0);
current_leaf->children[child_idx] = leaves->front();
+ current_leaf->depth = max(current_leaf->depth,
+ leaves->front()->depth + 1);
leaves->pop_front();
}
current_leaf->weight += current_leaf->children[child_idx]->weight;
@@ -111,7 +145,7 @@ class Huffman {
if (a->weight != b->weight) {
return a->weight < b->weight;
} else if (a->terminal != b->terminal) {
- return !a->terminal;
+ return b->terminal;
} else if (a->terminal) {
return a->c < b->c;
}
@@ -136,7 +170,10 @@ class Huffman {
code_tree = 0;
}
- void BuildCodeTree(const vector<pair<unsigned int, long> >& freq_table) {
+ // returns max depth
+ int BuildCodeTree(const vector<pair<uint16_t, uint32_t> >& freq_table,
+ uint32_t divisor=1) {
+ cout << "Divisor: " << divisor << "\n";
deque<Node*> leaves;
deque<Node*> internals;
if (freq_table.size() <= 2) {
@@ -144,12 +181,17 @@ class Huffman {
abort();
}
for (int i = 0; i < freq_table.size(); ++i) {
- leaves.push_back(new Node(freq_table[i].first, freq_table[i].second));
+ double weight = freq_table[i].second / divisor;
+ uint16_t sym = freq_table[i].first;
+ assert (sym == i);
+ if (weight == 0) {
+ weight = DBL_EPSILON;
+ }
+ leaves.push_back(new Node(freq_table[i].first, weight));
}
sort(leaves.begin(), leaves.end(), NodePtrComp);
Node* current_leaf = new Node();
- leaves.pop_front();
int total_size = leaves.size();
while (total_size >= 2) {
GetNextNode(current_leaf, 0, &leaves, &internals);
@@ -165,70 +207,347 @@ class Huffman {
// the last 'current_leaf' is extraneous. Delete it.
delete current_leaf;
assert(internals.size() == 1);
+ assert(leaves.size() == 0);
code_tree = internals.front();
+ //cout << "max tree depth: " << code_tree->depth << "\n";
+ return code_tree->depth;
}
void BuildCodeTableHelper(Node* current, deque<bool>* state) {
if (current->terminal) {
-
- OutputCharToOstream(cout, current->c);
- cout << "\n";
-
BitBucket bb;
for (int i = 0; i < state->size(); ++i) {
bb.StoreBit((*state)[i]);
- cout << (*state)[i];
}
- cout << "\n";
-
- cout << bb << "\n";
- cout << bb.DebugStr() << "\n";
- unsigned int idx = current->c;
- code_table[idx] = make_pair(vector<char>(), state->size());
- bb.GetBits(&(code_table[idx].first), state->size());
- cout << FormatAsBits(code_table[idx].first, code_table[idx].second) << "\n";
+ unsigned short idx = current->c;
+ code_table[idx] = VecAndLen(state->size());
+ bb.GetBits(&(code_table[idx].vec), state->size());
}
-
state->push_back(false);
if (current->children[0]) {
BuildCodeTableHelper(current->children[0], state);
}
- state->pop_back();
-
- state->push_back(true);
+ state->back() = true;
if (current->children[1]) {
BuildCodeTableHelper(current->children[1], state);
}
state->pop_back();
}
+ typedef map<int, vector<unsigned short> > DepthToSym;
+
+ void DiscoverDepthAndStoreIt(DepthToSym* depth_to_sym) {
+ deque<pair<Node*, int> > stack;
+ stack.push_back(make_pair(code_tree, 0));
+ while (!stack.empty()) {
+ Node* current = stack.back().first;
+ int depth = stack.back().second + 1;
+ if (current->terminal) {
+ DepthToSym::iterator it = depth_to_sym->find(depth - 1);
+ vector<unsigned short>* depth_set = 0;
+ if (it == depth_to_sym->end()) {
+ it = depth_to_sym->insert(make_pair(depth - 1,
+ vector<unsigned short>())).first;
+ }
+ it->second.push_back(current->c);
+ }
+ stack.pop_back();
+ if (current->children[0])
+ stack.push_back(make_pair(current->children[0], depth));
+ if (current->children[1])
+ stack.push_back(make_pair(current->children[1], depth));
+ }
+ }
+
+ uint32_t ComputeNextCode(uint32_t prev_code,
+ int current_code_length,
+ int prev_code_length) {
+ uint32_t next_code = (prev_code + 1);
+ next_code <<= (current_code_length - prev_code_length);
+ return next_code;
+ }
+
+ void Uint32ToCharArray(vector<char>* vec, uint32_t val, int bit_len) {
+ uint32_t nval = val << (32 - bit_len);
+ for (int rshift = 24; rshift >= 0 && bit_len > 0; rshift -= 8, bit_len -=8) {
+ unsigned char c = nval >> rshift;
+ vec->push_back(nval >> rshift);
+ }
+ }
+
+ void AltBuildCodeTable() {
+ DepthToSym depth_to_sym;
+ DiscoverDepthAndStoreIt(&depth_to_sym);
+ uint32_t code = 0xFFFFFFFF; // adding 1 will make this 0.
+
+ int prev_code_length = 0;
+ for (DepthToSym::iterator i = depth_to_sym.begin();
+ i != depth_to_sym.end();
+ ++i) {
+ int current_code_length = i->first;
+ sort(i->second.begin(), i->second.end());
+ const vector<unsigned short> &syms = i->second;
+ for (int j = 0; j < syms.size(); ++j) {
+ unsigned short c = syms[j];
+ code = ComputeNextCode(code, current_code_length, prev_code_length);
+ prev_code_length = current_code_length;
+ code_table[c] = VecAndLen(current_code_length);
+ code_table[c].val = code << (32 - current_code_length);
+ //code_table[c].val |= 0xFFFFFFFF >> current_code_length;
+ Uint32ToCharArray(&(code_table[c].vec), code, current_code_length);
+ }
+ }
+ // cout << "ALT TABLE START\n";
+ // for (int i = 0; i < code_table.size(); ++i) {
+ // cout << FormatAsBits(code_table[i].val, 32);
+ // cout << " ";
+ // cout << FormatAsBits(code_table[i].val, code_table[i].len);
+ // cout << " ";
+ // cout << FormatAsBits(code_table[i].vec, code_table[i].len);
+ // cout << " ";
+ // OutputCharToOstream(cout, i);
+ // cout << "\n";
+ // }
+ // cout << "ALT TABLE END\n";
+ }
+
+ struct BitPatternCmp {
+ int bit_len;
+
+ explicit BitPatternCmp(int bit_len) : bit_len(bit_len) {}
+
+ bool operator()(const VecAndLen& a, uint32_t b) const{
+ cout << "a.val(" << a.val << ")";
+ if (a.val < b) cout << " < ";
+ else cout << " >= ";
+ cout << "b(" << b <<")\n";
+ return a.val < b;
+ }
+ bool operator()(uint32_t a, const VecAndLen& b) const{
+ cout << "a(" << a << ")";
+ if (a < b.val) cout << " < ";
+ else cout << " >= ";
+ cout << "b.val(" << b.val <<")\n";
+ return a < b.val;
+ }
+ };
+
+ bool Equivalent(const vector<pair<uint32_t, int> >& sorted_by_code,
+ uint32_t idx_1,
+ uint32_t idx_2,
+ uint32_t msb,
+ uint32_t bw) {
+ uint32_t cur_code = sorted_by_code[idx_1].first;
+ uint32_t nxt_code = sorted_by_code[idx_2].first;
+ uint32_t cur_idx = (cur_code << msb) >> (32 - bw);
+ uint32_t nxt_idx = (nxt_code << msb) >> (32 - bw);
+ return cur_idx == nxt_idx;
+ }
+
+ struct DecodeEntry {
+ uint16_t sym;
+ uint8_t next_table;
+ bool valid;
+ DecodeEntry() : sym(0), next_table(0), valid(0) {}
+
+ DecodeEntry(uint16_t sym, uint8_t next_table) :
+ sym(sym), next_table(next_table), valid(1) {};
+
+ friend ostream& operator<<(ostream& os, const DecodeEntry& de) {
+ if (de.valid) {
+ os << "[DE " << static_cast<uint32_t>(de.next_table)
+ << " " << ReadableUShort(de.sym) << "]";
+ } else {
+ os << "[DE INVALID]";
+ }
+ return os;
+ }
+ };
+
+ struct BranchEntry {
+ uint32_t base_idx;
+ uint8_t bw;
+ BranchEntry() : base_idx(0), bw(0) {}
+ BranchEntry(uint32_t base_idx, uint8_t bw) : base_idx(base_idx), bw(bw) {}
+ friend ostream& operator<<(ostream& os, const BranchEntry& be) {
+ os << "[BE " << be.base_idx << " " << static_cast<uint32_t>(be.bw) << "]";
+ return os;
+ }
+ };
+
+ typedef vector<DecodeEntry> DecodeTable;
+ typedef vector<BranchEntry> Branches;
+
+ void AltBuildDecodeHelper(const vector<pair<uint32_t, int> >& sorted_by_code,
+ DecodeTable* decode_table,
+ Branches* tables,
+ uint32_t begin,
+ uint32_t end,
+ uint32_t msb,
+ uint32_t bw) {
+ uint32_t decode_table_idx = decode_table->size();
+ decode_table->resize(decode_table->size() + (0x1U << bw));
+ //cout << "decode_table now resized to: " << decode_table->size() << "\n";
+
+ uint32_t table_idx = tables->size();
+ tables->push_back(BranchEntry(decode_table_idx, bw));
+
+ uint32_t run_start = begin;
+ uint32_t run_end = begin;
+ while (run_end < end) {
+ while (Equivalent(sorted_by_code, run_start, run_end, msb, bw)) {
+ ++run_end;
+ if (run_end == end) {
+ break;
+ }
+ }
+ // run_start != run_end.
+ // implies, that run_start -> (run_end - 1) is equivalent.
+ uint32_t dist = run_end - run_start;
+ uint32_t cur_code = sorted_by_code[run_start].first;
+ uint32_t cur_idx = (cur_code << msb) >> (32 - bw);
+ for (int i = 0; i < msb; ++i) cout << " ";
+ if (dist == 1) {
+ cout << "Terminal: " << setw(6) << cur_idx
+ << " " << setw(6) << (cur_idx + decode_table_idx);
+ uint32_t code_len = code_table[sorted_by_code[run_start].second].len;
+ cout << "\t" << FormatAsBits(cur_code, code_len) << "\n";
+ uint16_t sym = sorted_by_code[run_start].second;
+ //cout << "storing [L] entry into: " << decode_table_idx << "\n";
+ (*decode_table)[decode_table_idx + cur_idx] = DecodeEntry(sym, 0);
+ } else {
+ uint32_t nxt_code_len = code_table[sorted_by_code[run_end - 1].second].len;
+ uint32_t nxt_code = sorted_by_code[run_end - 1].first;
+ uint32_t nxt_bit_len = nxt_code_len - (msb + bw);
+ cout << " Recurse: " << setw(6) << cur_idx
+ << " " << setw(6) << (cur_idx + decode_table_idx)
+ << "\t" << FormatAsBits(cur_code, msb + bw)
+ << " " << run_start << "->" << run_end
+ << " (" << (run_end - run_start) << ")"
+ << " (" << min(nxt_bit_len, bw) << ")"
+ << " (" << table_idx << ")"
+ <<"\n";
+ //cout << "storing [R] entry into: " << decode_table_idx << "\n";
+ (*decode_table)[decode_table_idx + cur_idx] = DecodeEntry(0, tables->size());
+ AltBuildDecodeHelper(sorted_by_code, decode_table, tables,
+ run_start, run_end,
+ msb + bw, min(bw, nxt_bit_len));
+ }
+ run_start = run_end;
+ }
+ }
+
+ void AltBuildDecodeTable() {
+ const int lookup_bits = 8;
+ const uint32_t max_val = (0x1U << lookup_bits);
+
+ vector<pair<uint32_t, int> > sorted_by_code; // code->symbol
+ for (int i = 0; i < code_table.size(); ++i) {
+ pair<uint32_t, int> insert_val;
+ insert_val.first = code_table[i].val;
+ insert_val.second = i;
+ sorted_by_code.push_back(insert_val);
+ }
+ sort(sorted_by_code.begin(), sorted_by_code.end());
+ Branches tables;
+ tables.push_back(BranchEntry()); // 0th index will loop to the same element.
+ DecodeTable decode_table;
+ AltBuildDecodeHelper(sorted_by_code, &decode_table, &tables,
+ 0, sorted_by_code.size(),
+ 0, lookup_bits);
+ cout << "Done building tables. Displayin' 'em now\n";
+ for (uint32_t i = 0; i < decode_table.size(); ++i) {
+ if (!decode_table[i].valid)
+ decode_table[i] = decode_table[i-1];
+ cout << setw(6) << i << " " << decode_table[i] << "\n";
+ }
+ for (int i = 0; i < tables.size(); ++i) {
+ cout << i << " " << tables[i] << "\n";
+ }
+ }
void BuildCodeTable() {
deque<bool> state;
if (!code_tree)
return;
+ AltBuildCodeTable();
+ AltBuildDecodeTable();
BuildCodeTableHelper(code_tree, &state);
}
+
Node* code_tree;
- array<pair<vector<char>, int>, 256+1> code_table;
+ CodeTable code_table;
+ unsigned short eof_value;
+
+ // for each possible prefix in the first 9 bits:
+ // lookup prefix. If it matches a terminal,
public:
- Huffman() : code_tree(0) { }
+ Huffman() : code_tree(0), eof_value(256) { }
~Huffman() { DeleteCodeTree(); }
- void Init(const vector<pair<unsigned int, long> >& freq_table) {
- BuildCodeTree(freq_table);
+ void Init(const vector<pair<uint16_t, uint32_t> >& freq_table) {
+ for (uint32_t divisor = 1;
+ BuildCodeTree(freq_table, divisor) > 32;
+ divisor *= 2){}
+ // And now that we know that all the codes are <= 32 bits long...
BuildCodeTable();
}
+ void Encode(BitBucket* bb, const string& str, bool use_eof) const {
+ for (int i = 0; i < str.size(); ++i) {
+ unsigned short idx = str[i];
+ bb->StoreBits(code_table[idx].vec, code_table[idx].len);
+ }
+ if (use_eof) {
+ bb->StoreBits(code_table[eof_value].vec, code_table[eof_value].len);
+ }
+ }
+
+ void Decode(string* output, BitBucket* bb,
+ bool use_eof, int bits_to_decode) const{
+ int total_bits = 0;
+ if (!use_eof && bits_to_decode < 0) {
+ cerr << "Invalid parameters for Decode\n";
+ abort();
+ }
+ while (bits_to_decode < 0 || total_bits < bits_to_decode) {
+ Node* root = code_tree;
+ while (! root->terminal) {
+ bool bit = bb->GetBit();
+ root = root->children[bit];
+ total_bits += 1;
+ }
+ if (use_eof && root->terminal && root->c == eof_value) {
+ break;
+ } else if (root->terminal) {
+ output->push_back((char)root->c);
+ } else {
+ cerr << "This shouldn't ever happen..\n";
+ abort();
+ }
+ }
+ if (bits_to_decode > 0 && total_bits < bits_to_decode) {
+ bb->SeekDelta(bits_to_decode - total_bits);
+ }
+ }
+
+
+ void AltDecode(string* output, BitBucket* bb,
+ bool use_eof, int bits_to_decode) const{
+ uint32_t word;
+ bb->FillUInt32(&word);
+ uint8_t bw = tables[1].bw;
+ }
+
friend ostream& operator<<(ostream &os, const Huffman& huff) {
for (int i = 0; i < huff.code_table.size(); ++i) {
+ os << FormatAsBits(huff.code_table[i].vec, huff.code_table[i].len);
+ os << " ";
OutputCharToOstream(os, i);
- os << "\t" << FormatAsBits(huff.code_table[i].first,
- huff.code_table[i].second);
os << "\n";
}
- PrettyPrintTreeToStream<Huffman::Node>(huff.code_tree, os);
+ //PrettyPrintTreeToStream<Huffman::Node>(huff.code_tree, os);
return os;
}
};
View
6 example_code/huffman.py
@@ -76,7 +76,7 @@ def EncodeToBB(self, bb, text, include_eof):
if bb.GetAllBits()[1] == prelen:
raise StandardError()
if include_eof:
- bb.StoreBits(self.code_table[128])
+ bb.StoreBits(self.code_table[256])
def Encode(self, text, include_eof):
bb = BitBucket()
@@ -95,7 +95,7 @@ def DecodeFromBB(self, bb, includes_eof, bits_to_decode):
bit = bb.GetBits(1)[0][0] >> 7
root = root[2][bit]
total_bits += 1
- if includes_eof and root[1] is not None and ord(root[1]) == 128:
+ if includes_eof and root[1] is not None and ord(root[1]) == 256:
break
elif root[1] is not None:
output.append(root[1])
@@ -131,7 +131,7 @@ def Decode(self, text, includes_eof, bits_to_decode):
if bit_index >= 8:
bit_index = 0
chr_index += 1
- if includes_eof and root[1] is not None and ord(root[1]) == 128:
+ if includes_eof and root[1] is not None and ord(root[1]) == 256:
break
elif root[1] is not None:
output.append(root[1])
View
BIN  example_code/huffman_test
Binary file not shown
View
51 example_code/huffman_test.cc
@@ -1,10 +1,59 @@
#include <stdlib.h>
#include "huffman.cc"
#include "header_freq_tables.cc"
+#include <iostream>
+
+using std::string;
+using std::cerr;
+
+struct Testcase {
+ string input;
+};
+
+template <typename T>
+void Test(const T& expected, const T& actual) {
+ if (expected != actual) {
+ cerr << "\n";
+ cerr << " --- FAILED ---\n";
+ cerr << " Expected: \"" << expected << "\"\n";
+ cerr << " Got: \"" << actual << "\"\n";
+ abort();
+ }
+}
+
+void TestEncodeDecode(const Huffman& huff,
+ const string& input,
+ bool use_eof,
+ bool use_length,
+ int length_delta) {
+ BitBucket bb;
+ huff.Encode(&bb, input, use_eof);
+ string decoded;
+ int num_bits = 0;
+ if (use_length)
+ num_bits = bb.NumBits() + length_delta;
+ huff.Decode(&decoded, &bb, use_eof, bb.NumBits());
+ Test(input, decoded);
+}
int main(int argc, char**argv) {
Huffman huff;
huff.Init(FreqTables::request_freq_table);
- cout << huff << "\n";
+ array<string,5> tests = {
+ "abbcccddddeeeee",
+ "foobarbaz",
+ "0-2rklnsvkl;-23kDFSi01k0=",
+ "-9083480-12hjkadsgf8912345kl;hjajkl; `123890",
+ "\0\0-3;jsdf"
+ };
+ for (int i = 0; i < tests.size(); ++i) {
+ const string& test = tests[i];
+ cerr << "TEST: " << test << "...";
+ TestEncodeDecode(huff, test, true, false, 0);
+ TestEncodeDecode(huff, test, false, true, 0);
+ TestEncodeDecode(huff, test, true, true, 8);
+ cerr << "PASSED!\n";
+ }
+ //cout << huff;
return EXIT_SUCCESS;
}
Please sign in to comment.
Something went wrong with that request. Please try again.