# Algorytmy tekstowe - laboraorium 2

# 1. Zadanie polega na implementacji dwóch algorytmów kompresji:

### 1. statycznego algorytmu Huffmana (2 p)

In [35]:
from queue import PriorityQueue
from collections import Counter

class Node:
    def __init__(self, frequency, sign=None):
        self.frequency = frequency
        self.sign = sign
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequency < other.frequency


def build_tree(signs):
    frequencies = Counter(signs)
    nodes = [Node(frequencies[sign], sign) for sign in frequencies]
    pq = PriorityQueue()
    root = None
    
    for node in nodes:
        pq.put(node)
    
    while True:
        left = pq.get()
        right = pq.get()
        parent = Node(left.frequency + right.frequency)
        parent.left, parent.right = left, right
        pq.put(parent)
        
        if pq.qsize() == 1: # root
            root = pq.get()
            break

    return root


def build_code_table(node, codes = {}, code = ""):
    if node.sign is not None:
        codes[node.sign] = code
        
    if node.left is not None:
        build_code_table(node.left, codes, code + "0")
    if node.right is not None:
        build_code_table(node.right, codes, code + "1")

    return codes

def static_huffman_encode(data):
    root = build_tree(data)
    codes = build_code_table(root)
    
    result = ""
    
    for sign in data:
        result += codes[sign]
        
    return result, root

def static_huffman_decode(encoded_data, root):
    decoded_data = ""
    node = root

    for bit in encoded_data:
        if bit == "1": node = node.right
        else: node = node.left

        if node.sign is not None:
            decoded_data += node.sign
            node = root

    return decoded_data

Test działnia:

In [36]:
data = "abracadabra"
result, root = static_huffman_encode(data)
print("Encoded data: " + result)
print("Is implemented correctly?: " + str(data == static_huffman_decode(result, root)))

Encoded data: 01111100100010101111100
Is implemented correctly?: True


### 2. dynamicznego algorytmu Huffmana (3 p)

In [53]:
class Node:
    def __init__(self, sign=None, weight=0, parent=None, left=None, right=None):
        self.sign = sign
        self.weight = weight
        self.parent = parent
        self.left = left
        self.right = right

def update_tree(node): # przechodzenie po drzewie metoda bottom-up
    parent = node.parent
    
    if parent is None: # mamy korzeń - koniec rekurencji, nie trzeba już nic poprawiać
        return
    
    nodes = [parent.left, parent.right]
    
    if nodes[0].weight > nodes[1].weight or nodes[0] is not node:
        nodes[0], nodes[1] = nodes[1], nodes[0]
        
    update_tree(parent)

def get_code(node):
    code = []
    while node.parent is not None:
        if node == node.parent.left:
            code.append('0')
        else:
            code.append('1')
        node = node.parent
    return list(reversed(code))

def build_tree(data):
    symbol_nodes = {}
    for sign in data:
        symbol_nodes[sign] = symbol_nodes.get(sign, Node(sign, 0))
        symbol_nodes[sign].weight += 1

    leaves = list(symbol_nodes.values())
    
    while len(leaves) > 1:
        leaves.sort(key=lambda n: (n.weight, id(n)))
        left, right = leaves.pop(0), leaves.pop(0)
        parent = Node(left.weight + right.weight, left, right)
        left.parent = right.parent = parent
        leaves.append(parent)
    
    return leaves[0], symbol_nodes

## kodowanie i dekodowanie
def dynamic_huffman_encode(data):
    root, symbol_nodes = build_tree(data)
    bits = []
    for sign in data:
        bits.extend(get_code(symbol_nodes[sign]))
    return ''.join(bits), root

def dynamic_huffman_decode(bits, root):
    current = root
    signs = []
    for bit in bits:
        if bit == '0': current = current.left
        else: current = current.right
            
        if current.sign is not None:
            signs.append(current.sign)
            current = root
            
    return ''.join(signs)

Test działania:

In [54]:
data = "abracadabra"
result, root = dynamic_huffman_encode(data)
print("Encoded data: " + result)
print("Is implemented correctly?: " + str(data == dynamic_huffman_decode(result, root)))

Encoded data: 01111100100010101111100
Is implemented correctly?: True
