### Q5) Huffman Code Implmentation
Node Class: Represents each node in the Huffman Tree, holding character, frequency, and pointers to child nodes.

Huffman Tree Creation: Builds the tree from character frequencies, using a priority queue to always merge the lowest frequency nodes first.

Code Generation: Traverses the tree to assign binary codes to each character based on their position in the tree.

Tree Printing: Visually represents the structure of the Huffman Tree for better understanding.

Average Bits Calculation: Computes the efficiency of the Huffman coding by comparing it to a fixed-length coding scheme.

In [16]:
import heapq  # Import the heapq module for implementing a priority queue (min-heap)

# Node class for Huffman Tree
class HuffmanNode:
    def __init__(self, char=None, freq=0, left=None, right=None):
        self.char = char          # Character (for leaf nodes, representing actual symbols)
        self.freq = freq          # Frequency of the node, indicating how often the character appears
        self.left = left          # Pointer to the left child node
        self.right = right        # Pointer to the right child node

    # This method ensures that the heapq library can compare and order nodes by frequency
    def __lt__(self, other):
        return self.freq < other.freq  # Compare based on frequency for the priority queue

# Function to generate the Huffman Tree from character frequencies
def create_huffman_tree(frequencies):
    heap = []  # Initialize an empty list to serve as our priority queue (min-heap)
    
    # Step 1: Create a priority queue with leaf nodes for each character
    for char, freq in frequencies.items():
        node = HuffmanNode(char, freq)  # Create a new HuffmanNode for each character and its frequency
        heapq.heappush(heap, node)  # Push the node onto the heap
    
    # Step 2: Build the tree by combining the two nodes with the lowest frequencies
    while len(heap) > 1:  # Continue until there is only one node left in the heap
        left = heapq.heappop(heap)  # Pop the node with the lowest frequency
        right = heapq.heappop(heap)  # Pop the next node with the lowest frequency
        
        # Create a new internal node with the sum of the two nodes' frequencies
        merged = HuffmanNode(None, left.freq + right.freq, left, right)  # Internal node has no character, just frequency
        heapq.heappush(heap, merged)  # Push the merged node back onto the heap

    # The final node left in the heap is the root of the Huffman Tree
    return heap[0]  # Return the root node of the constructed Huffman Tree

# Function to generate Huffman Codes from the tree
def generate_huffman_codes(node, prefix='', code_map={}):
    if node is not None:  # Check if the node is valid (not None)
        # If it's a leaf node, store the character and its corresponding code
        if node.char is not None:
            code_map[node.char] = prefix  # Map the character to its code using the prefix
        # Recursively traverse the left child, adding '0' to the prefix
        generate_huffman_codes(node.left, prefix + '0', code_map)
        # Recursively traverse the right child, adding '1' to the prefix
        generate_huffman_codes(node.right, prefix + '1', code_map)
    return code_map  # Return the complete code map of characters to their Huffman codes

# Function to print the structure of the Huffman Tree
def print_huffman_tree(node, indent=''):
    if node is not None:  # Check if the node is valid (not None)
        if node.char is not None:  # If it's a leaf node, print the character and frequency
            print(f"{indent}Leaf: {node.char} ({node.freq})")
        else:  # If it's an internal node, print the frequency
            print(f"{indent}Node ({node.freq})")
        # Recursively print the left child with an indentation for visual structure
        print_huffman_tree(node.left, indent + '  L-- ')
        # Recursively print the right child with an indentation for visual structure
        print_huffman_tree(node.right, indent + '  R-- ')

# Function to calculate the Average Bits per Letter (ABL) for the Huffman codes
def calculate_abl(code_map, frequencies):
    total_weighted_bits = 0  # Initialize a variable to hold the total weighted bits
    total_chars = sum(frequencies.values())  # Total number of characters (sum of all frequencies)
    
    for char, freq in frequencies.items():  # Iterate through each character and its frequency
        total_weighted_bits += len(code_map[char]) * freq  # Calculate weighted bits for each character

    # Return the average bits per letter by dividing the total weighted bits by total characters
    return total_weighted_bits / total_chars

# Frequencies of characters as given in the table
frequencies = {
    'A': 350, 'E': 240, 'C': 225, 'D': 200, 'F': 180, 
    'G': 120, 'H': 95, 'M': 90, 'T': 80, 'P': 65, 
    'Q': 50, 'R': 40, 'X': 20, 'Z': 15
}

# Step 1: Create the Huffman Tree using the given frequencies
huffman_tree = create_huffman_tree(frequencies)

# Step 2: Generate the Huffman Codes from the constructed tree
huffman_codes = generate_huffman_codes(huffman_tree)

# Step 3: Print the structure of the Huffman Tree
print("Huffman Tree Structure:")
print_huffman_tree(huffman_tree)

# Step 4: Print the generated Huffman Codes for each character
print("\nHuffman Codes:")
for char, code in huffman_codes.items():
    print(f"{char}: {code}")  # Print each character and its corresponding Huffman code

# Step 5: Calculate Average Bits per Letter (ABL) for Huffman codes
abl_huffman = calculate_abl(huffman_codes, frequencies)  # Compute ABL for Huffman codes
abl_fixed = 4  # Assume fixed-length codes use 4 bits (for 16 symbols, log2(16) = 4)

# Print the ABL results for Huffman coding and fixed-length coding
print("\nAverage Bits per Letter (Huffman):", abl_huffman)
print("Average Bits per Letter (Fixed 4-bits):", abl_fixed)


Huffman Tree Structure:
Node (1770)
  L-- Node (735)
  L--   L-- Leaf: A (350)
  L--   R-- Node (385)
  L--   R--   L-- Node (185)
  L--   R--   L--   L-- Leaf: M (90)
  L--   R--   L--   R-- Leaf: H (95)
  L--   R--   R-- Leaf: D (200)
  R-- Node (1035)
  R--   L-- Node (460)
  R--   L--   L-- Leaf: C (225)
  R--   L--   R-- Node (235)
  R--   L--   R--   L-- Node (115)
  R--   L--   R--   L--   L-- Leaf: Q (50)
  R--   L--   R--   L--   R-- Leaf: P (65)
  R--   L--   R--   R-- Leaf: G (120)
  R--   R-- Node (575)
  R--   R--   L-- Leaf: E (240)
  R--   R--   R-- Node (335)
  R--   R--   R--   L-- Node (155)
  R--   R--   R--   L--   L-- Node (75)
  R--   R--   R--   L--   L--   L-- Node (35)
  R--   R--   R--   L--   L--   L--   L-- Leaf: Z (15)
  R--   R--   R--   L--   L--   L--   R-- Leaf: X (20)
  R--   R--   R--   L--   L--   R-- Leaf: R (40)
  R--   R--   R--   L--   R-- Leaf: T (80)
  R--   R--   R--   R-- Leaf: F (180)

Huffman Codes:
A: 00
M: 0100
H: 0101
D: 011
C: 100
Q: 10

Node Class: Defines a structure for nodes in the Huffman tree, including comparison for priority in the heap.

Huffman Tree Construction: Builds the Huffman tree from character frequencies, merging nodes based on frequency until only one remains.

Code Generation: Traverses the tree to create binary codes for each character.

ABL Calculation: Computes the average bits per letter for both Huffman and fixed-length encoding, giving insight into the efficiency of Huffman coding.

Main Function: Ties everything together, building the tree, generating codes, printing results, and calculating ABL.

In [17]:
import heapq  # Import the heapq module to implement a priority queue (min-heap)
from collections import defaultdict  # Import defaultdict for easier dictionary handling

# Node class to represent the characters and frequencies in the Huffman Tree
class Node:
    def __init__(self, char, freq):
        self.char = char  # Character represented by this node
        self.freq = freq  # Frequency of the character
        self.left = None  # Pointer to the left child node
        self.right = None  # Pointer to the right child node
    
    # Define comparison for the priority queue (heapq) to compare nodes by frequency
    def __lt__(self, other):
        return self.freq < other.freq  # Compare nodes based on frequency

# Function to build the Huffman Tree
def build_huffman_tree(frequency_table):
    heap = []  # Initialize an empty list to serve as the priority queue (min-heap)
    
    # Create a leaf node for each character and push it to the priority queue
    for char, freq in frequency_table.items():
        heapq.heappush(heap, Node(char, freq))  # Push nodes onto the heap
    
    # Iterate until the heap contains only one node (the root of the tree)
    while len(heap) > 1:
        # Remove the two nodes with the lowest frequency
        left = heapq.heappop(heap)  # Pop the node with the lowest frequency
        right = heapq.heappop(heap)  # Pop the next node with the lowest frequency
        
        # Create a new internal node with a frequency equal to the sum of the two nodes
        merged = Node(None, left.freq + right.freq)  # Create an internal node
        merged.left = left  # Set the left child to the first node
        merged.right = right  # Set the right child to the second node
        
        # Add the new node to the heap
        heapq.heappush(heap, merged)  # Push the merged node back onto the heap
    
    # Return the root of the Huffman tree (the last remaining node in the heap)
    return heap[0]

# Function to generate Huffman codes by traversing the tree
def generate_huffman_codes(root, current_code, huffman_code):
    if root is None:  # Base case: if the node is None, return
        return
    
    # If we reach a leaf node, save the current code for the character
    if root.char is not None:
        huffman_code[root.char] = current_code  # Map character to its corresponding code
        return
    
    # Traverse left (append "0") and right (append "1")
    generate_huffman_codes(root.left, current_code + "0", huffman_code)  # Go left and append "0"
    generate_huffman_codes(root.right, current_code + "1", huffman_code)  # Go right and append "1"

# Function to calculate Average Bits per Letter (ABL) for Huffman and Fixed-length encoding
def calculate_abl(frequency_table, huffman_code):
    # Calculate total bits required for Huffman coding
    total_bits_huffman = sum(frequency_table[char] * len(huffman_code[char]) for char in frequency_table)
    total_frequency = sum(frequency_table.values())  # Total frequency of all characters
    
    # Huffman ABL calculation
    huffman_abl = total_bits_huffman / total_frequency  # Average bits per letter for Huffman
    
    # Fixed-length ABL (since 4 bits are needed for 14 characters)
    fixed_length_abl = 4  # Fixed-length encoding requires 4 bits (for 14 characters)

    return huffman_abl, fixed_length_abl  # Return both ABL values

# Main function
def main():
    # Frequency table for the characters
    frequency_table = {
        'A': 350, 'E': 240, 'C': 225, 'D': 200,
        'F': 180, 'G': 120, 'H': 95, 'M': 90,
        'T': 80, 'P': 65, 'Q': 50, 'R': 40,
        'X': 20, 'Z': 15
    }
    
    # Build the Huffman Tree using the frequency table
    root = build_huffman_tree(frequency_table)
    
    # Generate Huffman Codes
    huffman_code = {}  # Initialize an empty dictionary to store the Huffman codes
    generate_huffman_codes(root, "", huffman_code)  # Populate the Huffman codes
    
    # Print the Huffman Codes in sorted order
    print("Huffman Codes:")
    for char in sorted(huffman_code):  # Sort characters for neat printing
        print(f"{char}: {huffman_code[char]}")  # Print each character and its Huffman code
    
    # Calculate ABL for both Huffman and Fixed-length encoding
    huffman_abl, fixed_length_abl = calculate_abl(frequency_table, huffman_code)
    
    # Output the ABL values
    print(f"\nAverage Bits per Letter (Fixed-length encoding): {fixed_length_abl} bits")
    print(f"Average Bits per Letter (Huffman encoding): {huffman_abl:.2f} bits")

# Run the main function
if __name__ == "__main__":
    main()  # Execute the main function if this script is run directly


Huffman Codes:
A: 00
C: 100
D: 011
E: 110
F: 1111
G: 1011
H: 0101
M: 0100
P: 10101
Q: 10100
R: 111001
T: 11101
X: 1110001
Z: 1110000

Average Bits per Letter (Fixed-length encoding): 4 bits
Average Bits per Letter (Huffman encoding): 3.44 bits


: 