# Huffman Coding

In [3]:
'''
直接用 HeapNode 當 Huffman Tree 的 Node。HeapNode 裡的 left 和 right 和 heap 無關
'''

from pandas import DataFrame
import heapq

class HeapNode:
    def __init__(self, freq, char=None, left=None, right=None):
        self.char = char
        self.freq = freq
        
        # Children of the Huffman Tree, not the heap
        self.left = left
        self.right = right
        
    def __lt__(self, other):
        return self.freq < other.freq
    
    def __repr__(self):
        return f'({self.char}, {self.freq})'
    
message = r'In computer science and information theory, a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. The process of finding or using such a code proceeds by means of Huffman coding, an algorithm developed by David A. Huffman while he was a Sc.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes".'
freqTable = DataFrame(list(message), columns=['char']).groupby('char').size().to_frame(name='freq').reset_index()

nodes = [HeapNode(char=char, freq=freq) for char, freq in freqTable.values]

h = []
for node in nodes:
    heapq.heappush(h, node)

while len(h) > 1:
    left = heapq.heappop(h)
    right = heapq.heappop(h)
    heapq.heappush(h, HeapNode(char='', freq=left.freq+right.freq, left=left, right=right))
    
root = h[0]


(, 409)