In [1]:
# Huffman Encoding Implementation using Greedy Strategy

import heapq

# A class to represent each node in the Huffman Tree
class HuffmanNode:
    def __init__(self, char, freq):
        self.char = char        # Character
        self.freq = freq        # Frequency
        self.left = None        # Left child
        self.right = None       # Right child

    # Overriding comparison operators for heapq (priority queue)
    def __lt__(self, other):
        return self.freq < other.freq


# Step 1: Build Huffman Tree
def build_huffman_tree(char_freq):
    # Create a priority queue (min-heap)
    heap = [HuffmanNode(char, freq) for char, freq in char_freq.items()]
    heapq.heapify(heap)

    # Repeat until only one node remains (the root)
    while len(heap) > 1:
        # Remove two nodes with lowest frequency
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)

        # Create a new internal node with these two as children
        merged = HuffmanNode(None, left.freq + right.freq)
        merged.left = left
        merged.right = right

        # Push the merged node back into the heap
        heapq.heappush(heap, merged)

    # The remaining node is the root of the Huffman tree
    return heap[0]


# Step 2: Generate Huffman Codes (recursive traversal)
def generate_codes(node, current_code, codes):
    if node is None:
        return

    # Leaf node: assign the code
    if node.char is not None:
        codes[node.char] = current_code
        return

    # Traverse left (add '0') and right (add '1')
    generate_codes(node.left, current_code + "0", codes)
    generate_codes(node.right, current_code + "1", codes)


# Step 3: Encode the input text
def huffman_encoding(text):
    # Calculate frequency of each character
    freq = {}
    for char in text:
        freq[char] = freq.get(char, 0) + 1

    # Build Huffman Tree
    root = build_huffman_tree(freq)

    # Generate Huffman Codes
    codes = {}
    generate_codes(root, "", codes)

    # Encode the text
    encoded_text = "".join(codes[char] for char in text)
    return encoded_text, root, codes


# Step 4: Decode the encoded string
def huffman_decoding(encoded_text, root):
    decoded_text = ""
    current = root

    for bit in encoded_text:
        current = current.left if bit == "0" else current.right

        # If leaf node
        if current.char is not None:
            decoded_text += current.char
            current = root

    return decoded_text


# ---------------- MAIN DRIVER CODE ----------------
if __name__ == "__main__":
    text = input("Enter text to encode: ")

    print("\nOriginal Text:", text)
    encoded_text, root, codes = huffman_encoding(text)

    print("\nHuffman Codes:", codes)
    print("Encoded Text:", encoded_text)

    decoded_text = huffman_decoding(encoded_text, root)
    print("Decoded Text:", decoded_text)

    print("\nCompression Ratio: {:.2f}%".format(
        (len(encoded_text) / (len(text) * 8)) * 100))


Enter text to encode:  huffman



Original Text: huffman

Huffman Codes: {'u': '00', 'f': '01', 'n': '100', 'h': '101', 'a': '110', 'm': '111'}
Encoded Text: 101000101111110100
Decoded Text: huffman

Compression Ratio: 32.14%
