# Huffman Coding

In [20]:
from pandas import DataFrame
import heapq

class HeapNode:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        
    def __lt__(self, other):
        return self.freq < other.freq
    
    def __le__(self, other):
        return self.freq <= other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq
    
    def __repr__(self):
        return f'({self.char}, {self.freq})'

class TreeNode:
    def __init__(self, val=0, left=None, right=None):
        self.val = val
        self.left = None
        self.right = None
    
message = r'In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".'
freqTable = DataFrame(list(message), columns=['char']).groupby('char').size().to_frame(name='freq').reset_index()

nodes = [HeapNode(char=char, freq=freq) for char, freq in freqTable.values]

h = []
for node in nodes:
    heapq.heappush(h, node)



[(-, 1),
 (5, 1),
 (1, 1),
 (9, 1),
 (A, 2),
 (D, 2),
 (2, 1),
 (R, 1),
 (x, 1),
 (b, 3),
 (C, 2),
 (,, 3),
 (H, 3),
 (I, 2),
 (M, 3),
 (p, 12),
 (S, 1),
 (T, 2),
 (", 2),
 (., 5),
 (c, 16),
 (d, 19),
 (e, 30),
 (f, 15),
 (g, 4),
 (h, 11),
 (i, 21),
 (l, 9),
 (m, 13),
 (n, 24),
 (o, 29),
 ( , 68),
 (r, 15),
 (s, 21),
 (t, 18),
 (u, 13),
 (v, 2),
 (a, 24),
 (w, 2),
 (y, 6)]