In [1]:
from huffman.encoder import HuffmanEncoder


In [2]:
# Create a HuffmanEncoder object
encoder = HuffmanEncoder()

example_input = './inputs/simple.txt'

# First we need to count the number of times each character appears in the input
encoder._fit_encoder(example_input)

print(f'Character counts: {encoder._character_counts}')
print(f'Encoding tree: {encoder._encoding_tree}')
print(f'Encoding table: {encoder._encoding_table}')


Character counts: {'a': 10, 'b': 5, 'c': 3, 'd': 2, 'e': 1}
Encoding tree: HuffmanNode(None, 21)
Encoding table: {'d': '1111', 'e': '11101', 'EOF': '11100', 'c': '110', 'b': '10', 'a': '0'}


In [3]:
# Test the encoder
encoder = HuffmanEncoder()
encoder.encode(example_input, './outputs/simple.huff')

with open('./outputs/simple.huff', 'rb') as f:
    print(f.read())


b'\x00\x00\x00\x05d\x00\x00\x00\x041111e\x00\x00\x00\x0511101c\x00\x00\x00\x03110b\x00\x00\x00\x0210a\x00\x00\x00\x010\xff\xff\xff\xff\x0511100\x00*\xad\xb7\xffx'


In [4]:
# Test fitting the decoder
encoder = HuffmanEncoder()
with open('./outputs/simple.huff', 'rb') as f:
    encoder._fit_decoder(f)
print(f'Encoding tree: {encoder._encoding_table}')


Encoding tree: {'1111': 'd', '11101': 'e', '110': 'c', '10': 'b', '0': 'a', '11100': 'EOF'}


In [5]:
# Test the decoder
encoder = HuffmanEncoder()
encoder.decode('./outputs/simple.huff', './outputs/simple_decoded.txt')

with open('./outputs/simple_decoded.txt', 'r') as f:
    print(f.read())  # expect aaaaaaaaaabbbbbcccdde


aaaaaaaaaabbbbbcccdde


In [6]:
# Encode and decode a bigger file

encoder = HuffmanEncoder()
encoder.encode('./inputs/ai_thriller.txt', './outputs/ai_thriller.huff')

with open('./outputs/ai_thriller.huff', 'rb') as f:
    encoded = f.read()
    print(f'Encoded file size: {len(encoded)} bytes')
    print(f'Encoded data: {encoded}')

with open('./inputs/ai_thriller.txt', 'r') as f:
    original = f.read()
    print(f'Original file size: {len(original)} bytes')


Encoded file size: 1891 bytes
Encoded data: b'\x00\x00\x00*c\x00\x00\x00\x06111111g\x00\x00\x00\x06111110v\x00\x00\x00\x071111011y\x00\x00\x00\x071111010j\x00\x00\x00\t111100111x\x00\x00\x00\x0b11110011011-\x00\x00\x00\x0b11110011010q\x00\x00\x00\n1111001100\n\x00\x00\x00\x0811110010b\x00\x00\x00\x0811110001S\x00\x00\x00\x0811110000t\x00\x00\x00\x041110 \x00\x00\x00\x03110w\x00\x00\x00\x06101111p\x00\x00\x00\x06101110d\x00\x00\x00\x0510110.\x00\x00\x00\x071010111P\x00\x00\x00\n1010110111z\x00\x00\x00\n1010110110I\x00\x00\x00\t101011010T\x00\x00\x00\t101011001\'\x00\x00\x00\t101011000m\x00\x00\x00\x06101010l\x00\x00\x00\x0510100h\x00\x00\x00\x041001o\x00\x00\x00\x041000a\x00\x00\x00\x040111r\x00\x00\x00\x040110e\x00\x00\x00\x03010f\x00\x00\x00\x06001111u\x00\x00\x00\x06001110,\x00\x00\x00\x06001101D\x00\x00\x00\x0b00110011111E\x00\x00\x00\x0c001100111101R\x00\x00\x00\x0b00110011101O\x00\x00\x00\x0b00110011100W\x00\x00\x00\t001100110A\x00\x00\x00\x0800110010k\x00\x00\x00\x070011000n\x00\

In [8]:
# Decode the larger file
encoder = HuffmanEncoder()
encoder.decode('./outputs/ai_thriller.huff',
               './outputs/ai_thriller_decoded.txt')

with open('./outputs/ai_thriller_decoded.txt', 'r') as f:
    decoded = f.read()
    print(f'Decoded file size: {len(decoded)} bytes')
    print(f'Decoded data: {decoded}')

# Compare the original and decoded files
with open('./inputs/ai_thriller.txt', 'r') as f:
    original = f.read()

assert original == decoded


Decoded file size: 2513 bytes
Decoded data: Ever since the President had personally selected her, Sarah Thompson's life had been a whirlwind of secrecy, intense pressure, and the weight of the world on her shoulders. As a top software engineer, she had been tasked with studying machine learning under the President's orders, in an effort to save the world from a catastrophic threat that loomed just over the horizon. The details of this threat were highly classified, and she was left in the dark, only knowing that her work was critical to the survival of humanity.

Sarah's days were long, her nights short, and her work environment heavily monitored. She was provided with the most advanced technology and resources, while top machine learning specialists were brought in to work alongside her.

As the project progressed, Sarah's intuition told her that something was off. The pressure from the government was immense, and the secretive nature of the project began to raise questions in her min