# Huffman Coding

In [1]:
#include <iostream>
#include <string>
#include <queue>       // std::priority_queue
#include <vector>
#include <functional>  // std::greater
#include <unordered_map>


class HeapNode
{
    HeapNode* left = nullptr;
    HeapNode* right = nullptr;
    double freq;
    char ch;

public: 
    HeapNode(const double& freq, const char& ch, HeapNode* left=nullptr, HeapNode* right=nullptr) 
        : freq(freq), ch(ch), left(left), right(right) {}
    
    void print() const { std::cout << "(" << ch << ", " << freq << ")" << std::endl; }
    
    friend bool operator<(const HeapNode& a, const HeapNode& b) { 
        return a.freq > b.freq; 
    }
};

std::string message(R"(In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".)");

std::unordered_map<char, int> freq;
for (const char& c : message)
    freq[c]++;

std::vector<HeapNode> nodes;
for (auto [letter, frequency] : freq)
    nodes.push_back(HeapNode(frequency, letter));

std::priority_queue<HeapNode> q(std::less<HeapNode>(), nodes);

while(!q.empty()) 
{
    const auto& p = q.top();
    p.print();
    q.pop();
}


(5, 1)
(x, 1)
(R, 1)
(2, 1)
(S, 1)
(9, 1)
(1, 1)
(-, 1)
(v, 2)
(", 2)
(I, 2)
(D, 2)
(C, 2)
(T, 2)
(w, 2)
(A, 2)
(M, 3)
(H, 3)
(,, 3)
(b, 3)
(g, 4)
(., 5)
(y, 6)
(l, 9)
(h, 11)
(p, 12)
(u, 13)
(m, 13)
(f, 15)
(r, 15)
(c, 16)
(t, 18)
(d, 19)
(i, 21)
(s, 21)
(n, 24)
(a, 24)
(o, 29)
(e, 30)
( , 68)
