# Huffman Coding

In [1]:
#include <iostream>
#include <string>
#include <queue>       // std::priority_queue
#include <vector>
#include <utility>     // std::pair
#include <functional>  // std::greater
#include <unordered_map>

class HeapNode
{
public: 
    const HeapNode* left = nullptr;
    const HeapNode* right = nullptr;
    int freq;
    char ch;
    
    HeapNode(int freq, char ch, const HeapNode* left, const HeapNode* right) 
        : freq(freq), ch(ch), left(left), right(right) {}
    
    void print() const { 
        std::cout << "(" << ch << ", " << freq << ")" << std::endl; 
    }
    friend bool operator>(const HeapNode& a, const HeapNode& b) { 
        return a.freq > b.freq; 
    }
};

std::string message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

std::unordered_map<char, int> freq;
for (const char& c : message)
    freq[c]++;

std::vector<HeapNode> nodes;
for (const auto& pair : freq){
    char letter = pair.first;
    int frequency = pair.second;
    nodes.push_back(HeapNode(frequency, letter, nullptr, nullptr));
}

std::priority_queue<HeapNode, std::vector<HeapNode>, std::greater<HeapNode>> q(std::greater<HeapNode>(), nodes);

while(q.size() > 1) 
{
    // when this loop finishes, q.top() will be the root of a Huffman tree
    
    
    q.top().print();
    const HeapNode* left = &(q.top());
    q.pop();
    
    
    q.top().print();
    const HeapNode* right = &(q.top());
    q.pop();

    HeapNode node(left->freq + right->freq, '\0', left, right);
    node.print();
    node.left->print();
    node.right->print();
    
    std::cout << std::endl;
    q.push(HeapNode(left->freq + right->freq, '\0', left, right));
}

(5, 1)
(x, 1)
( , 2)
(R, 1)
(R, 1)

(R, 1)
(2, 1)
( , 2)
(S, 1)
(S, 1)

(S, 1)
(9, 1)
( , 2)
(1, 1)
(1, 1)

(1, 1)
(-, 1)
( , 4)
(v, 2)
(v, 2)

(v, 2)
(", 2)
( , 4)
(I, 2)
(I, 2)

(I, 2)
(D, 2)
( , 4)
(C, 2)
(C, 2)

(C, 2)
(T, 2)
( , 4)
(w, 2)
(w, 2)

(w, 2)
( , 2)
( , 4)
( , 2)
( , 2)

( , 2)
(A, 2)
( , 4)
( , 2)
( , 2)

( , 2)
(H, 3)
( , 6)
(M, 3)
(M, 3)

(M, 3)
(,, 3)
( , 6)
(b, 3)
(b, 3)

(b, 3)
( , 4)
( , 8)
( , 4)
( , 4)

( , 4)
(g, 4)
( , 8)
( , 4)
( , 4)

( , 4)
( , 4)
( , 8)
( , 4)
( , 4)

( , 4)
( , 4)
( , 10)
(., 5)
(., 5)

(., 5)
( , 6)
( , 12)
( , 6)
( , 6)

( , 6)
(y, 6)
( , 16)
( , 8)
( , 8)

( , 8)
( , 8)
( , 16)
( , 8)
( , 8)

( , 8)
(l, 9)
( , 20)
( , 10)
( , 10)

( , 10)
(h, 11)
( , 24)
( , 12)
( , 12)

( , 12)
(p, 12)
( , 26)
(u, 13)
(u, 13)

(u, 13)
(m, 13)
( , 30)
(r, 15)
(r, 15)

(r, 15)
(f, 15)
( , 32)
( , 16)
( , 16)

( , 16)
( , 16)
( , 32)
(c, 16)
(c, 16)

(c, 16)
(t, 18)
( , 38)
(d, 19)
(d, 19)

(d, 19)
( , 20)
( , 42)
(s, 21)
(s, 21)

(s, 21)
(i, 21)
( , 48

In [7]:
const HeapNode* root = &(q.top());

for (size_t i : {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}){
    root->print();
    root = root->right;
}

( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)
( , 1088)


In [2]:
std::vector<std::pair<char, std::string>> codeCharPairs(const HeapNode* p)
{
    if (p->left==nullptr && p->right==nullptr)
    {
        std::cout << "end" << std::endl;
        return std::vector<std::pair<char, std::string>> {{p->ch, ""}};
    } 
    else 
    {
        std::vector<std::pair<char, std::string>> from_left = codeCharPairs(p->left);
        std::vector<std::pair<char, std::string>> from_right = codeCharPairs(p->right);
        
        std::cout << from_left.size() << ", " << from_right.size() << std::endl;
        
        for (auto& tuple : from_left)
            tuple.second = '0' + tuple.second;
        
        for (auto& tuple : from_right)
            tuple.second = '1' + tuple.second;
        
        from_left.insert(from_left.end(), from_right.begin(), from_right.end());
        return from_left;
    }
}

input_line_9:3:1: error: function definition is not allowed here
{
^


Interpreter Error: 

In [None]:
const HeapNode* root = &(q.top());

std::cout << codeCharPairs(root) << std::endl;


[Does Not Work](http://cpp.sh/5ues4)