# Huffman Coding in C++

* 是可以跑的但有 memory leaking：只有 ```new``` 沒有 ```delete```
* coding 結果和 python 不一樣

In [1]:
#include <iostream>
#include <string>
#include <queue>       // std::priority_queue
#include <vector>
#include <utility>     // std::pair
#include <functional>  // std::greater
#include <unordered_map>

class HeapNode
{
public: 
    HeapNode* left = nullptr;
    HeapNode* right = nullptr;
    int freq;
    char ch;
    
    HeapNode(int freq, char ch, HeapNode* left, HeapNode* right) 
        : freq(freq), ch(ch), left(left), right(right) {}
    
    HeapNode(const HeapNode& other) { 
        HeapNode* p = clone(&other); 
        
        this->freq = p->freq;
        this->ch = p->ch;
        this->left = p->left;
        this->right = p->right;
    }
 
//     ~HeapNode() { 
//         deleteNode(this->left);
//         deleteNode(this->right);
//     }
 
//     void deleteNode(const HeapNode* p) {
//         if (p->left)
//             deleteNode(p->left);
//         if (p->right)
//             deleteNode(p->right);
//         delete p;
//     }
    
    HeapNode* clone(const HeapNode* other) {
        HeapNode* p = new HeapNode(other->freq, other->ch, nullptr, nullptr);
        
        if (other->left)
            p->left = clone(other->left);
        if (other->right)
            p->right = clone(other->right);
        
        return p;
    }

    
    void print() const { 
        std::cout << "(" << ch << ", " << freq << ")" << std::endl; 
    }
    
    friend bool operator>(const HeapNode& a, const HeapNode& b) { 
        return a.freq > b.freq; 
    }
};


std::string message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

std::unordered_map<char, int> freq;
for (const char& c : message)
    freq[c]++;

std::vector<HeapNode> nodes;
for (const auto& pair : freq){
    char letter = pair.first;
    int frequency = pair.second;
    nodes.push_back(HeapNode(frequency, letter, nullptr, nullptr));
}

std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>> q(std::greater<HeapNode>(), nodes);

while(q.size() > 1) 
{
    // when this loop finishes, q.top() will be the root of a Huffman tree

    HeapNode leftNode = q.top();  
    HeapNode* left = &leftNode;
    q.pop();

    HeapNode rightNode = q.top();
    HeapNode* right = &rightNode;
    q.pop();

    q.push(HeapNode(left->freq + right->freq, '\0', left, right));
    
    if (q.size()==1)
    {
        const HeapNode* root = &(q.top());
        root->print();
        root->left->print();
        root->left->left->print();
        root->left->left->left->print();
        root->left->left->left->left->print();
        root->left->left->left->left->left->print();
        root->left->left->left->left->left->left->print();
        root->left->left->left->left->left->left->left->print();
    }
}

( , 409)
( , 168)
( , 76)
( , 36)
( , 18)
( , 9)
( , 4)
(T, 2)


In [2]:
// using auto is an workaround as the original signature wouldn't work in cling
// std::vector<std::pair<char, std::string>> codeCharPairs(const HeapNode* p) 
// this is a known issue; see https://github.com/jupyter-xeus/xeus-cling/issues/40

auto codeCharPairs(const HeapNode* p)
{
    if (p->ch)
    {
        return std::vector<std::pair<char, std::string>> {{p->ch, ""}};
    }
    else 
    {
        std::vector<std::pair<char, std::string>> from_left = codeCharPairs(p->left);
        std::vector<std::pair<char, std::string>> from_right = codeCharPairs(p->right);
        
        for (auto& pair : from_left)
            pair.second = '0' + pair.second;
        
        for (auto& pair : from_right)
            pair.second = '1' + pair.second;
        
        from_left.insert(from_left.end(), from_right.begin(), from_right.end());
        return from_left;
    }
}


In [3]:
const HeapNode* root = &(q.top());

for (const auto& pair : codeCharPairs(root))
    std::cout << pair.first << ", " << pair.second << std::endl;

T, 0000000
w, 0000001
., 000001
l, 00001
t, 0001
d, 0010
s, 0011
i, 0100
h, 01010
y, 010110
H, 0101110
M, 0101111
n, 0110
a, 0111
p, 10000
m, 10001
u, 10010
,, 1001100
b, 1001101
5, 100111000
x, 100111001
S, 100111010
9, 100111011
A, 10011110
R, 100111110
2, 100111111
o, 1010
r, 10110
f, 10111
e, 1100
c, 11010
I, 11011000
1, 110110010
-, 110110011
D, 11011010
C, 11011011
g, 1101110
v, 11011110
", 11011111
 , 111
