In [1]:
from encoder import HuffmanEncoder


In [2]:
# Create a HuffmanEncoder object
encoder = HuffmanEncoder()

example_input = './inputs/simple.txt'

# First we need to count the number of times each character appears in the input
encoder._fit_encoder(example_input)

print(f'Character counts: {encoder._character_counts}')
print(f'Encoding tree: {encoder._encoding_tree}')
print(f'Encoding table: {encoder._encoding_table}')


Character counts: {'a': 10, 'b': 5, 'c': 3, 'd': 2, 'e': 1}
Encoding tree: HuffmanNode(None, 21)
Encoding table: {'d': '1111', 'e': '11101', 'EOF': '11100', 'c': '110', 'b': '10', 'a': '0'}


In [3]:
# Test the encoder
encoder = HuffmanEncoder()
encoder.encode(example_input, './outputs/simple.huff')

with open('./outputs/simple.huff', 'rb') as f:
    print(f.read())


b'\x00\x00\x00\x05d\x00\x00\x00\x041111e\x00\x00\x00\x0511101c\x00\x00\x00\x03110b\x00\x00\x00\x0210a\x00\x00\x00\x010\xff\xff\xff\xff\x0511100\x00*\xad\xb7\xffx'


In [4]:
# Test fitting the decoder
encoder = HuffmanEncoder()
with open('./outputs/simple.huff', 'rb') as f:
    encoder._fit_decoder(f)
print(f'Encoding tree: {encoder._encoding_table}')


Encoding tree: {'1111': 'd', '11101': 'e', '110': 'c', '10': 'b', '0': 'a', '11100': 'EOF'}


In [5]:
# Test the decoder
encoder = HuffmanEncoder()
encoder.decode('./outputs/simple.huff', './outputs/simple_decoded.txt')

with open('./outputs/simple_decoded.txt', 'r') as f:
    print(f.read())  # expect aaaaaaaaaabbbbbcccdde


aaaaaaaaaabbbbbcccdde


In [6]:
# Encode and decode a bigger file

encoder = HuffmanEncoder()
encoder.encode('./inputs/ai_thriller.txt', './outputs/ai_thriller.huff')

with open('./outputs/ai_thriller.huff', 'rb') as f:
    encoded = f.read()
    print(f'Encoded file size: {len(encoded)} bytes')

with open('./inputs/ai_thriller.txt', 'r') as f:
    original = f.read()
    print(f'Original file size: {len(original)} bytes')


Encoded file size: 1891 bytes
Original file size: 2513 bytes


In [7]:
# Decode the larger file
encoder = HuffmanEncoder()
encoder.decode('./outputs/ai_thriller.huff',
               './outputs/ai_thriller_decoded.txt')

with open('./outputs/ai_thriller_decoded.txt', 'r') as f:
    decoded = f.read()
    print(f'Decoded file size: {len(decoded)} bytes')

# Compare the original and decoded files
with open('./inputs/ai_thriller.txt', 'r') as f:
    original = f.read()

assert original == decoded


Decoded file size: 2513 bytes
