# Dominik Adamczyk
## Laboratorium 2 - rozwiązania 

#### 1. Implementacja statycznego algorytmu Huffmana

In [58]:
from collections import Counter
from heapq import heapify, heappop, heappush
from bitarray import bitarray, decodetree, frozenbitarray

class Static_node:
    def __init__(self, symbol, weight, left=None, right=None):
        self.symbol = symbol
    self.weight = weight
    self.left = left
    self.right = right
    
    def __lt__(self, other):
        return self.weight < other.weight
    
def static_huffman_traverse(codes, node, code=frozenbitarray()):
    if node.symbol is not None:
        codes[node.symbol] = code
    else:
        static_huffman_traverse(codes, node.left, frozenbitarray(bitarray(code) + bitarray('0')))
        static_huffman_traverse(codes, node.right, frozenbitarray(bitarray(code) + bitarray('1')))

def static_huffman_encoding(data):
    freq = Counter(data)
    heap = [Static_node(symbol, weight) for symbol, weight in freq.items()]
    
    heapify(heap)
    
    while len(heap) > 1:
        node1 = heappop(heap)
        node2 = heappop(heap)
        merged = Static_node(None, node1.weight + node2.weight, node1, node2)
        heappush(heap, merged)
    
    codes = {}
    static_huffman_traverse(codes, heap[0])
    
    # print(codes)
    encoded_data = bitarray() # ''.join(codes[symbol] for symbol in data)
    for symbol in data:
        encoded_data += codes[symbol] 
    
    codes =  {code: symbol for symbol, code in codes.items()}
    
    return encoded_data, codes

def static_huffman_decoding(encoded_data, codes):
    # reverse_codes = {code: symbol for symbol, code in codes.items()}
    
    decoded_data = ''
    code = bitarray()
    for bit in encoded_data:
        code.append(bit)
        if frozenbitarray(code) in codes:
            symbol = codes[frozenbitarray(code)]
            decoded_data += symbol
            code = bitarray()
    
    return decoded_data


a, b = static_huffman_encoding("""ala ma kota
                               i ma tez pska takiego 
                               oj oj oj 12
                               34""")


In [139]:
import struct

def encode_file(filename, static=True):
    with open("text_files/" + filename + ".txt", "r") as f:
        text = f.read()
        
    if static:
        text_coded, decode_tree = static_huffman_encoding(text)
    else:
        text_coded, decode_tree = None, None
    with open("compressed_files/" + filename + ".bin", "wb+") as f:
        num_entries = len(decode_tree)
        header = struct.pack("i", num_entries)
        f.write(header)
        bits_to_omit = 0 if len(text_coded) % 8 == 0 else 8 - len(text_coded) % 8
        header2 = struct.pack("b", bits_to_omit)
        f.write(header2)
        # print(len(text_coded), len(text_coded) % 8)
        
        for code, letter in decode_tree.items():
            code = code.to01()
            code_bytes = code.encode("ascii")
            letter_bytes = letter.encode("ascii")
            code_len = len(code_bytes)
            entry = struct.pack(f'b{cod_len}s1s', code_len, code_bytes, letter_bytes)
            f.write(entry)
        text_coded.tofile(f)

encode_file("test")

def decode_file(filename, static=True):
    with open("compressed_files/" + filename + ".bin", "rb") as f:
        header = f.read(4)
        num_entries = struct.unpack('i', header)[0]
        header = f.read(1)
        bits_to_omit = struct.unpack('b', header)[0]
        
        decode_tree = {}
        for _ in range(num_entries):
            len_entry = f.read(1)
            # print(len_entry)
            len_entry = struct.unpack('b', len_entry)[0]
            # print(len_entry)
            code_bytes = f.read(len_entry)
            letter_bytes = f.read(1)
            code = code_bytes.decode("ascii")
            letter = letter_bytes.decode("ascii")
            decode_tree[frozenbitarray(code)] = letter
        coded_data = bitarray()
        coded_data.fromfile(f)
        coded_data = coded_data[:-bits_to_omit]
    
    # text = static_huffman_decoding(coded_data, decode_tree)
    with open("decompressed_files/" + filename + ".txt", "w") as f:
        f.write(static_huffman_decoding(coded_data, decode_tree))
    # print(text)

def file_compare(filename):
    with open("text_files/" + filename + ".txt", "r") as f1, open("decompressed_files/" + filename + ".txt", "r") as f2:
        f1_cont = f1.read()
        f2_cont = f2.read()
        if f1_cont == f2_cont:
            print(f"Initial, and decompressed file '{filename}.txt' is the same")
        else:
            print(f"Initial, and decompressed file '{filename}.txt' is not the same")
decode_file("test")
file_compare("test")

Initial, and decompressed file 'test.txt' is the same


In [86]:
import struct

def encode_file(filename, static=True):
        with open(filename) as f:
                text = f.read()
        if static:
                huffman_code, decode_tree = static_huffman_decoding(text)
        else:
                huffman_code, decode_tree = None, None
        with open("compressed_files/" + file + ".bin", "wb+") as f:
file = "test"
with open("text_files/" + file +".txt", "r") as f:
        text = f.read()
a, b = static_huffman_encoding(text)
print(a)
print(b)
# a.append()
with open("compressed_files/" + file + ".bin", "wb+") as f:
        num_entries = len(b)
        header = struct.pack("i", num_entries)
        f.write(header)
        
        for code, letter in b.items():
                code = code.to01()
                code_bytes = code.encode("ascii")
                letter_bytes = letter.encode("ascii")
                code_len = len(code_bytes)
                letter_len = len(letter_bytes)
                entry = struct.pack("ii%ds%ds" % (code_len, letter_len), code_len, letter_len, code_bytes, letter_bytes)
                # print(code_bytes, letter_bytes)
                # print(entry)
                f.write(entry)

        a.tofile(f)

with open("compressed_files/" + file + ".bin", "rb") as f:
        header = f.read(4)
        num_entries = struct.unpack("i", header)[0]
        
        code_to_letter = {}
        for i in range(num_entries):
                entry = f.read(8)
                
                # print(entry)
                code_len, letter_len = struct.unpack("ii", entry)
                code_bytes = f.read(code_len)
                letter_bytes = f.read(letter_len)
                # print(code_len, letter_len, code_bytes, letter_bytes)
                code = code_bytes.decode("ascii")
                letter = letter_bytes.decode("ascii")
                code_to_letter[code] = letter
        coded_data = bitarray()
        coded_data.fromfile(f)
        code_to_letter = {frozenbitarray(code): symbol for code, symbol in code_to_letter.items()}
print(coded_data)
print(code_to_letter)

decoded = static_huffman_decoding(coded_data, code_to_letter)
print(decoded)

with open("decompressed_files/" + file + ".txt", "w+") as f:
        f.write(decoded)

SyntaxError: invalid syntax (431622948.py, line 10)