In [115]:
import numpy as np

In [116]:
class Node:
    def __init__(self, x, s = None):
        self.symbol = s
        self.frequency = x
        self.code = None
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequency < other.frequency
    
    def __eq__(self, other):

        if isinstance(other, Node):
            return self.frequency == other.frequency
        return False

class CustomList:
    def __init__(self):
        self.data = []

    def pop(self):
        if not self.data:
            raise IndexError("pop from empty list")
        return self.data.pop(0)

    def insert_sorted(self, node):
        for i in range(len(self.data)):
            if self.data[i] > node:
                self.data.insert(i, node)
                return
        self.data.append(node)

    def __len__(self):
        return len(self.data)

def preOrder(root, codebook, codeword):
    if root is None:
        return

    if root.left is None and root.right is None:
        #ans.append(curr)
        codebook[root.symbol] = {"frequency": root.frequency, "code": codeword}
        return

    preOrder(root.left, codebook, codeword + '1')
    preOrder(root.right, codebook, codeword + '0')

def huffmanCodes(s, freq):
    n = len(s)

    custom_list = CustomList()
    
    for i in range(n):
        tmp = Node(freq[i], i)
        custom_list.insert_sorted(tmp)
    
    while len(custom_list) >= 2:
        l = custom_list.pop()
        r = custom_list.pop()

        newNode = Node(l.frequency + r.frequency)
        newNode.left = l
        newNode.right = r

        custom_list.insert_sorted(newNode)

    # Raiz da árvore
    root = custom_list.pop()
    codebook = {}
    preOrder(root, codebook, "")
    return codebook, root

def huffman_decode(bitstream, root):
    decoded_symbols = []
    node = root

    for bit in bitstream:
        if bit == '1':
            node = node.left
        elif bit == '0':
            node = node.right
        else:
            raise ValueError(f"Bit inválido: {bit}")

        if node.left is None and node.right is None:
            decoded_symbols.append(node.symbol)
            node = root 

    return decoded_symbols

def huffman_decode_with_codebook(bitstream, inverse_codebook):
    decoded = []
    buffer = ''

    for bit in bitstream:
        buffer += bit
        if buffer in inverse_codebook:
            decoded.append(inverse_codebook[buffer])
            buffer = ''  # reseta para o próximo símbolo

    if buffer != '':
        raise ValueError("Bitstream inválido: sufixo final não reconhecido")

    return decoded

def is_prefix_free(codes):
    for i, code1 in enumerate(codes):
        for j, code2 in enumerate(codes):
            if i != j and code2.startswith(code1):
                return False, code1, code2
    return True, None, None
    
def print_huffman_tree(node, level=0):
    """
    Imprime a árvore de Huffman mostrando os nós e suas frequências.

    Parâmetros:
    ----------
    node : Node
        Nó atual da árvore.
    level : int
        Nível atual na árvore (usado para indentação).
    """
    if node is not None:
        # Imprime o nó atual com indentação baseada no nível
        print("  " * level + f"{level} Frequência: {node.frequency}")
        
        # Recursivamente imprime os filhos (esquerda e direita)
        print_huffman_tree(node.left, level + 1)
        print_huffman_tree(node.right, level + 1)

#### Exemplo 1 (Cover)

In [117]:
#exemplo 1 - cover
s = range(0,5)
freq = [0.25, 0.25, 0.2, 0.15, 0.15] 
codebook, huffman_tree_root = huffmanCodes(s, freq)
for key in sorted(codebook.keys()):
    print(key, codebook[key])
print("\n-----------------------\n")
print_huffman_tree(huffman_tree_root)

0 {'frequency': 0.25, 'code': '10'}
1 {'frequency': 0.25, 'code': '01'}
2 {'frequency': 0.2, 'code': '11'}
3 {'frequency': 0.15, 'code': '001'}
4 {'frequency': 0.15, 'code': '000'}

-----------------------

0 Frequência: 1.0
  1 Frequência: 0.45
    2 Frequência: 0.2
    2 Frequência: 0.25
  1 Frequência: 0.55
    2 Frequência: 0.25
    2 Frequência: 0.3
      3 Frequência: 0.15
      3 Frequência: 0.15


#### Exemplo Huffman Shapping - Artigo do Ungerboeck 

In [118]:
# exemplo com dados do artigo

p_i = np.array([
    0.03872, 0.02991, 0.02991, 0.02311, 0.01785, 0.01785,
    0.01379, 0.01379, 0.00823, 0.00823, 0.00823, 0.00636, 
    0.00636, 0.00379, 0.00379, 0.00293, 0.00293, 0.00226, 
    0.00226, 0.00175, 0.00135, 0.00135, 0.00081, 0.00081, 
    0.00062, 0.00062, 0.00062, 0.00062, 0.00037, 0.00037, 
    0.00022, 0.00017
])

p_i_n = np.array([
    0.03125, 0.03125, 0.03125, 0.03125,
    0.01563, 0.01563, 0.01563, 0.01563,
    0.00781, 0.00781, 0.00781, 0.00781,
    0.00781, 0.00391, 0.00391, 0.00195, 
    0.00195, 0.00195, 0.00195, 0.00195,
    0.00098, 0.00098, 0.00049, 0.00049, 
    0.00049, 0.00049, 0.00049, 0.00049,
    0.00024, 0.00024, 0.00024, 0.00024
])

#comprimento das palavras de código
li = np.round(-np.log2(p_i_n))

s = range(0,32)
freq = p_i

codebook, huffman_tree_root = huffmanCodes(s, freq)
for key in sorted(codebook.keys()):
    print(key, codebook[key])


0 {'frequency': 0.03872, 'code': '000'}
1 {'frequency': 0.02991, 'code': '100'}
2 {'frequency': 0.02991, 'code': '011'}
3 {'frequency': 0.02311, 'code': '111'}
4 {'frequency': 0.01785, 'code': '0100'}
5 {'frequency': 0.01785, 'code': '0011'}
6 {'frequency': 0.01379, 'code': '1100'}
7 {'frequency': 0.01379, 'code': '1011'}
8 {'frequency': 0.00823, 'code': '10100'}
9 {'frequency': 0.00823, 'code': '01011'}
10 {'frequency': 0.00823, 'code': '01010'}
11 {'frequency': 0.00636, 'code': '11011'}
12 {'frequency': 0.00636, 'code': '11010'}
13 {'frequency': 0.00379, 'code': '101011'}
14 {'frequency': 0.00379, 'code': '101010'}
15 {'frequency': 0.00293, 'code': '0010010'}
16 {'frequency': 0.00293, 'code': '0010001'}
17 {'frequency': 0.00226, 'code': '0010101'}
18 {'frequency': 0.00226, 'code': '0010100'}
19 {'frequency': 0.00175, 'code': '0010111'}
20 {'frequency': 0.00135, 'code': '00100111'}
21 {'frequency': 0.00135, 'code': '00100110'}
22 {'frequency': 0.00081, 'code': '001000001'}
23 {'freque

In [119]:
# verifica se é livre de prefixo
codewords = [v['code'] for v in codebook.values()]

result, prefix, full = is_prefix_free(codewords)

if result:
    print("O código é prefixo-free!")
else:
    print(f"O código '{prefix}' é prefixo de '{full}', então não é prefixo-free.")

O código é prefixo-free!


In [120]:
symbols = np.random.choice(len(freq), size=1000, p=freq/freq.sum())
print("Símbolos:", symbols[:10], "...")
bitstream = ''.join(codebook[symbol]['code'] for symbol in symbols)

print("Bits codificados:", bitstream[:100], "...")

inverse_codebook = {v['code']: k for k, v in codebook.items()}

result = huffman_decode(bitstream, huffman_tree_root)
print("Símbolos decodificados:", result)

result = huffman_decode_with_codebook(bitstream, inverse_codebook)
print("Símbolos decodificados:", result)

Símbolos: [ 1 13  0  4  9  6  0  1  0  6] ...
Bits codificados: 1001010110000100010111100000100000110001110110010010011101100010110110001010110110100101001000001010 ...
Símbolos decodificados: [1, 13, 0, 4, 9, 6, 0, 1, 0, 6, 2, 7, 15, 2, 7, 0, 7, 2, 0, 13, 2, 4, 8, 1, 0, 13, 0, 9, 8, 8, 0, 2, 13, 0, 4, 1, 4, 13, 0, 7, 1, 6, 0, 2, 1, 1, 7, 0, 11, 12, 1, 9, 14, 17, 3, 0, 2, 4, 19, 0, 11, 15, 7, 1, 8, 12, 2, 5, 0, 2, 0, 0, 2, 7, 5, 0, 19, 1, 2, 0, 1, 0, 4, 7, 3, 2, 7, 1, 4, 3, 29, 0, 7, 17, 3, 3, 6, 1, 11, 1, 0, 2, 7, 0, 6, 1, 5, 2, 5, 11, 0, 19, 3, 3, 0, 0, 11, 4, 6, 5, 17, 0, 9, 2, 5, 3, 0, 2, 25, 3, 1, 10, 9, 6, 8, 7, 4, 1, 6, 0, 0, 3, 4, 3, 0, 11, 2, 11, 19, 2, 1, 1, 0, 16, 2, 0, 4, 1, 4, 3, 11, 11, 0, 13, 8, 5, 3, 5, 2, 0, 4, 9, 24, 3, 19, 3, 9, 10, 1, 0, 2, 11, 0, 14, 0, 2, 6, 11, 3, 2, 11, 0, 2, 2, 5, 0, 9, 2, 16, 0, 5, 0, 3, 0, 0, 4, 26, 2, 13, 13, 0, 7, 2, 15, 5, 6, 6, 1, 1, 7, 21, 4, 1, 0, 3, 1, 10, 0, 0, 0, 1, 17, 0, 2, 1, 5, 0, 5, 1, 5, 0, 0, 5, 12, 3, 2, 8, 0, 7, 6, 1, 21, 17

#### Exemplo 3

In [121]:
# exemplo 3
freq = np.array([1/16,1/16,1/16,1/16,1/8,1/8, 1/8,1/8,1/4])
s = range(0,9)
codes, huffman_tree_root = huffmanCodes(s, freq)
for key in sorted(codes.keys()):
    print(key, codes[key])

print("\n-----------------------\n")
print_huffman_tree(huffman_tree_root)

0 {'frequency': 0.0625, 'code': '0011'}
1 {'frequency': 0.0625, 'code': '0010'}
2 {'frequency': 0.0625, 'code': '0001'}
3 {'frequency': 0.0625, 'code': '0000'}
4 {'frequency': 0.125, 'code': '101'}
5 {'frequency': 0.125, 'code': '100'}
6 {'frequency': 0.125, 'code': '011'}
7 {'frequency': 0.125, 'code': '010'}
8 {'frequency': 0.25, 'code': '11'}

-----------------------

0 Frequência: 1.0
  1 Frequência: 0.5
    2 Frequência: 0.25
    2 Frequência: 0.25
      3 Frequência: 0.125
      3 Frequência: 0.125
  1 Frequência: 0.5
    2 Frequência: 0.25
      3 Frequência: 0.125
      3 Frequência: 0.125
    2 Frequência: 0.25
      3 Frequência: 0.125
        4 Frequência: 0.0625
        4 Frequência: 0.0625
      3 Frequência: 0.125
        4 Frequência: 0.0625
        4 Frequência: 0.0625
