# Huffman Coding in C++

* 還沒改好的 shared_ptr 版本

In [1]:
#include <iostream>
#include <string>
#include <memory>      // std::shared_ptr, std::make_shared
#include <queue>       // std::priority_queue
#include <vector>
#include <utility>     // std::pair, std::swap
#include <functional>  // std::greater
#include <unordered_map>

class HeapNode
{
public: 
    std::shared_ptr<HeapNode> left = nullptr;
    std::shared_ptr<HeapNode> right = nullptr;
    int freq;
    char ch;
    
    HeapNode(int freq, char ch, std::shared_ptr<HeapNode> left, std::shared_ptr<HeapNode> right) 
        : freq(freq), ch(ch), left(left), right(right) {}
    
    HeapNode(const HeapNode& other) {  // copy ctor
        std::shared_ptr<HeapNode> p = clone(std::make_shared<HeapNode>(&other));
        
        this->freq = p->freq;
        this->ch = p->ch;
        this->left = p->left;
        this->right = p->right;
    }
 
//     ~HeapNode() { 
//         deleteNode(this->left);
//         deleteNode(this->right);
//     }
 
//     void deleteNode(const HeapNode* p) {
//         if (p){
//             deleteNode(p->left);
//             deleteNode(p->right);
//             delete p;
//         }
//     }
    
    std::shared_ptr<HeapNode> clone(std::shared_ptr<HeapNode> other) {
        std::shared_ptr<HeapNode> p = std::make_shared<HeapNode>(other->freq, other->ch, nullptr, nullptr);
        
        if (other->left)
            p->left = clone(other->left);
        if (other->right)
            p->right = clone(other->right);
        
        return p;
    }
    
    friend bool operator>(const HeapNode& a, const HeapNode& b) { 
        return a.freq > b.freq; 
    }
};

In file included from input_line_5:1:
In file included from /srv/conda/envs/notebook/include/xeus/xinterpreter.hpp:13:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/functional:54:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/tuple:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/array:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/stdexcept:39:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/string:41:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/allocator.h:46:
In file included from /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0/bits/c++allocator.h:33:
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/ext/new_allocator.h:136:23: error: no matching constructor for initialization

Interpreter Error: 

In [8]:
class HuffmanCoding{

    std::string original_message;
    std::vector<std::pair<std::string, std::string>> codeCharPairs;
    const std::shared_ptr<HeapNode> huffmanTree;
    std::shared_ptr<std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>> q;
    
    void buildHuffmanTree()
    {
        std::unordered_map<char, int> freq;
        for (const char& c : original_message)
            freq[c]++;

        std::vector<HeapNode> nodes;
        for (const auto& pair : freq){
            char letter = pair.first;
            int frequency = pair.second;
            nodes.push_back(HeapNode(frequency, letter, nullptr, nullptr));
        }

        q = std::make_shared<std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>>>(std::greater<HeapNode>(), nodes);

        while(q->size() > 1) 
        {
            std::shared_ptr<HeapNode> left = std::make_shared<HeapNode>(q->top());
            q->pop();
            
            std::shared_ptr<HeapNode> right = std::make_shared<HeapNode>(q->top());
            q->pop();
            
            std::shared_ptr<HeapNode> root = std::make_shared<HeapNode>(left->freq + right->freq, '\0', left, right);
            q->push(*root);
        }
        huffmanTree = &(q->top());
    }
    
    std::vector<std::pair<std::string, std::string>> getCodeCharPairs(const std::shared_ptr<HeapNode> p)
    {        
        // if this weren't a member function it would have to be defined as an auto as the original signature wouldn't work in cling
        // this is a known issue; see https://github.com/jupyter-xeus/xeus-cling/issues/40

        if (p->ch)
        {
            std::string ch(1, p->ch);
            return std::vector<std::pair<std::string, std::string>> {{ch, ""}};
        }
        else 
        {
            std::vector<std::pair<std::string, std::string>> from_left = getCodeCharPairs(p->left);
            std::vector<std::pair<std::string, std::string>> from_right = getCodeCharPairs(p->right);

            for (auto& pair : from_left)
                pair.second = '0' + pair.second;

            for (auto& pair : from_right)
                pair.second = '1' + pair.second;

            from_left.insert(from_left.end(), from_right.begin(), from_right.end());
            return from_left;
        }
    }

public:
    HuffmanCoding(const std::string& original_message) : original_message(original_message) {}
    
    std::string compress(const std::string& message){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);
        
        std::unordered_map<std::string, std::string> codeTable(codeCharPairs.begin(), codeCharPairs.end());
        
        std::string compressed;
        for (const char& c : message)
        {
            std::string ch(1, c);
            compressed += codeTable[ch];
        }
        return compressed;
    }
    std::string decompress(const std::string& compressed){
        if (!huffmanTree)
            buildHuffmanTree();
        if (codeCharPairs.size()==0)
            codeCharPairs = getCodeCharPairs(huffmanTree);

        for (auto& pair : codeCharPairs)
            std::swap(pair.first, pair.second);

        std::unordered_map<std::string, std::string> charTable(codeCharPairs.begin(), codeCharPairs.end());

        std::string::const_iterator it = compressed.begin();
        std::string code, decompressed;

        while (it!=compressed.end())
        {
            code += *it;
            if (charTable.find(code) != charTable.end()){
                decompressed += charTable[code];
                code = "";
            }
            it++;
        }
        return decompressed;
    }
};

std::string original_message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

original_message

"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."

In [9]:
HuffmanCoding hc(original_message);

std::string message = original_message;
std::string compressed = hc.compress(message);
compressed

"110110000110111110101010100011000010010000111001011011100111101001001100011011010110011101110110001011101000110101111010101101000101110001010010100110111000101010110010101011001011010011001110111111010111010010101111011110001011101101111101010100010110011101000011111011111110000011110110000101001101010010000010111101101110001010110100001100111101010111111101010000000101001000101110000111110000101101100101110100100111001111110101010001011001110001010100111000111101000011111110101010100011000110100110000010101101111001000111100001011110111101010110111000011010001100110000111000011001111100100111000101111111101010101000110000101101100001100110100101001100000011110000000010101100111100001011010101101011000011001111110101011111110111010001100010010001101101110111101010110111100100011010001101101110111001110010110100101011101111111101010100010110011110000101101010110101100110000100011111100110101011011110001110001110110001111110101011111101011101001010111101111000101110110111110101010001001

In [10]:
std::string recovered = hc.decompress(compressed);
recovered

"In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes"."

In [11]:
(recovered == message)

true