In [1]:
import sys
sys.path.append('../src')

In [None]:
import numpy as np
from typing import Dict, Optional, List, Tuple
from utils import Node, OrderedQueue
from huffman_code import HuffmanTree
from huffman_shaping import HuffmanShaping

In [3]:
def print_huffman_tree(node, level=0):
    if node is not None:
        # Imprime o nó atual com indentação baseada no nível
        print("  " * level + f"{level} Frequência: {node.frequency}")
        
        # Recursivamente imprime os filhos (esquerda e direita)
        print_huffman_tree(node.left, level + 1)
        print_huffman_tree(node.right, level + 1)

#### Exemplo Huffman Shapping - Artigo do Ungerboeck 

##### Dados

In [4]:
symbols = list(range(32))

frequencies = np.array([
    0.03872, 0.02991, 0.02991, 0.02311, 0.01785, 0.01785,
    0.01379, 0.01379, 0.00823, 0.00823, 0.00823, 0.00636, 
    0.00636, 0.00379, 0.00379, 0.00293, 0.00293, 0.00226, 
    0.00226, 0.00175, 0.00135, 0.00135, 0.00081, 0.00081, 
    0.00062, 0.00062, 0.00062, 0.00062, 0.00037, 0.00037, 
    0.00022, 0.00017
])

p_i_n = np.array([
    0.03125, 0.03125, 0.03125, 0.03125,
    0.01563, 0.01563, 0.01563, 0.01563,
    0.00781, 0.00781, 0.00781, 0.00781,
    0.00781, 0.00391, 0.00391, 0.00195, 
    0.00195, 0.00195, 0.00195, 0.00195,
    0.00098, 0.00098, 0.00049, 0.00049, 
    0.00049, 0.00049, 0.00049, 0.00049,
    0.00024, 0.00024, 0.00024, 0.00024
])

##### Huffman Code

In [5]:
tree = HuffmanTree(symbols, frequencies)

codebook = tree.codebook
codebook.sort(key=lambda node: node.symbol)
for c in codebook:
    print(f"{c.symbol} | {c.frequency} | {c.code}")
# for s, info in sorted(codebook.items()):
#     print(f"{s} | {info['frequency']:.5f} | {info['code']}")

0 | 0.03872 | 000
1 | 0.02991 | 100
2 | 0.02991 | 011
3 | 0.02311 | 111
4 | 0.01785 | 0100
5 | 0.01785 | 0011
6 | 0.01379 | 1100
7 | 0.01379 | 1011
8 | 0.00823 | 10100
9 | 0.00823 | 01011
10 | 0.00823 | 01010
11 | 0.00636 | 11011
12 | 0.00636 | 11010
13 | 0.00379 | 101011
14 | 0.00379 | 101010
15 | 0.00293 | 0010010
16 | 0.00293 | 0010001
17 | 0.00226 | 0010101
18 | 0.00226 | 0010100
19 | 0.00175 | 0010111
20 | 0.00135 | 00100111
21 | 0.00135 | 00100110
22 | 0.00081 | 001000001
23 | 0.00081 | 001000000
24 | 0.00062 | 001011010
25 | 0.00062 | 001011001
26 | 0.00062 | 001011000
27 | 0.00062 | 001000011
28 | 0.00037 | 0010000101
29 | 0.00037 | 0010000100
30 | 0.00022 | 0010110110
31 | 0.00017 | 0010110111


##### Huffman Shaping

In [6]:
tree_shaping = HuffmanShaping(symbols, frequencies)
expand_codebook = tree_shaping.expanded_codebook
expand_codebook.sort(key=lambda node: node.symbol)
for c in expand_codebook:
    print(f"{c.symbol} | {c.frequency:.5f} | {c.code}")

0 | 0.03872 | 00000
1 | 0.02991 | 10000
2 | 0.02991 | 01100
3 | 0.02311 | 11100
4 | 0.01785 | 010000
5 | 0.01785 | 001100
6 | 0.01379 | 110000
7 | 0.01379 | 101100
8 | 0.00823 | 1010000
9 | 0.00823 | 0101100
10 | 0.00823 | 0101000
11 | 0.00636 | 1101100
12 | 0.00636 | 1101000
13 | 0.00379 | 10101100
14 | 0.00379 | 10101000
15 | 0.00293 | 001001000
16 | 0.00293 | 001000100
17 | 0.00226 | 001010100
18 | 0.00226 | 001010000
19 | 0.00175 | 001011100
20 | 0.00135 | 0010011100
21 | 0.00135 | 0010011000
22 | 0.00081 | 00100000100
23 | 0.00081 | 00100000000
24 | 0.00062 | 00101101000
25 | 0.00062 | 00101100100
26 | 0.00062 | 00101100000
27 | 0.00062 | 00100001100
28 | 0.00037 | 001000010100
29 | 0.00037 | 001000010000
30 | 0.00022 | 001011011000
31 | 0.00017 | 001011011100
32 | 0.03872 | 00001
33 | 0.02991 | 10001
34 | 0.02991 | 01101
35 | 0.02311 | 11101
36 | 0.01785 | 010001
37 | 0.01785 | 001101
38 | 0.01379 | 110001
39 | 0.01379 | 101101
40 | 0.00823 | 1010001
41 | 0.00823 | 0101101
42 | 0

In [7]:
result, prefix, full = tree_shaping.is_prefix_free()

if result:
    print("O código é prefixo-free!")
else:
    print(f"O código '{prefix}' é prefixo de '{full}', então não é prefixo-free.")

O código é prefixo-free!


In [8]:
input_data = np.random.choice(len(frequencies)*4, size=1000, p=np.tile(frequencies, 4)/(frequencies.sum()*4))
print(f"Símbolos: {list(input_data[:10])} ...")

bitstream = ''.join([expand_codebook[i].code for i in input_data])
print("Bits codificados:", bitstream[:100], "...")

output_data = tree_shaping.decode_with_codebook(bitstream)
print(f"Decodificado: {list(input_data[:10])} ...")

acuracia = np.mean(input_data == output_data)
print(f"Acurácia: {acuracia * 100:.2f}%")

Símbolos: [97, 33, 83, 3, 37, 2, 111, 12, 69, 64] ...
Bits codificados: 1001110001001011110111000011010110000100101111010000011100001001011001010110001111011111110100110101 ...
Decodificado: [97, 33, 83, 3, 37, 2, 111, 12, 69, 64] ...
Acurácia: 100.00%


#### Exemplo 2

In [9]:
# exemplo 3
freq = np.array([1/16,1/16,1/16,1/16,1/8,1/8, 1/8,1/8,1/4])
s = range(0,9)
tree = HuffmanTree(s, freq)

codes = tree.get_codebook()

codes.sort(key=lambda node: node.symbol)
for c in codes:
    print(f"{c.symbol} | {c.frequency:.5f} | {c.code}")

print("\n-----------------------\n")
print_huffman_tree(tree.root)

0 | 0.06250 | 0011
1 | 0.06250 | 0010
2 | 0.06250 | 0001
3 | 0.06250 | 0000
4 | 0.12500 | 101
5 | 0.12500 | 100
6 | 0.12500 | 011
7 | 0.12500 | 010
8 | 0.25000 | 11

-----------------------

0 Frequência: 1.0
  1 Frequência: 0.5
    2 Frequência: 0.25
    2 Frequência: 0.25
      3 Frequência: 0.125
      3 Frequência: 0.125
  1 Frequência: 0.5
    2 Frequência: 0.25
      3 Frequência: 0.125
      3 Frequência: 0.125
    2 Frequência: 0.25
      3 Frequência: 0.125
        4 Frequência: 0.0625
        4 Frequência: 0.0625
      3 Frequência: 0.125
        4 Frequência: 0.0625
        4 Frequência: 0.0625
