In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# default_exp core.ds.string.huffman

In [3]:
#hide
from nbdev.showdoc import *

In [18]:
# export
from dataclasses import dataclass, field
import heapq
from typing import Any


@dataclass(order=True)
class Node:
    weight: int
    c: Any=field(compare=False, default='$')
    left: Any=field(compare=False, default=None)
    right: Any=field(compare=False, default=None)
    

def huffman_encode(root, prefix=""):
    if root.left is not None:
        yield from huffman_encode(root.left, prefix+"0")
        if root.right is not None:
            yield from huffman_encode(root.right, prefix+"1")
    else:
        if root.right is not None:
            yield from huffman_encode(root.right, prefix+"1")
        else:
            yield (root.c, prefix)
    
    
def huffman(s):
    if not s:
        return
    
    q = []
    for c, i in s:
        heapq.heappush(q, Node(i, c))
    
    while len(q) > 1:
        l = heapq.heappop(q)
        r = heapq.heappop(q)
        n = Node(l.weight + r.weight, '$', l, r)
        heapq.heappush(q, n)
        
    root = q[0]
    yield from huffman_encode(root)
    

In [19]:
chars = ['a', 'b', 'c', 'd', 'e', 'f']
freq = [ 5, 9, 12, 13, 16, 45]
s = [(c, f) for c, f in zip(chars, freq)]
s

[('a', 5), ('b', 9), ('c', 12), ('d', 13), ('e', 16), ('f', 45)]

In [20]:
for c, code in huffman(s):
    print(f"{c} --> {code}")

f --> 0
c --> 100
d --> 101
a --> 1100
b --> 1101
e --> 111
