In [None]:
## Problem-3 Huffman coding

In [24]:
from functools import total_ordering

@total_ordering
class Node:
    def __init__(self,data,frequency):
        self.data = data
        self.frequency = frequency
        self.left_child = None
        self.right_child = None
        
    def get_frequency(self):
        return self.frequency    
    
    def set_data(self,data,frequency):
        self.data= data
        self.frequency = frequency
        
    def get_data(self):
        return self.data
        
    def set_left_child(self,node):
        self.left_child = node
        
    def set_right_child(self,node):
        self.right_child = node
        
    def get_right_child(self):
        return self.right_child
    
    def get_left_child(self):
        return self.left_child
    
    def has_right_child(self):
        return self.right_child !=None
    
    def has_left_child(self):
        return self.left_child !=None 
    
    def __eq__(self,other):
        
        if other !=None:
            return self.frequency==other.frequency
        return False
    
    def __lt__(self,other):
        return self.frequency < other.frequency
    
    def __gt__(self,other):
        return self.frequency > other.frequency
        
    def __str__(self):
        return "Node({},{})".format(self.data,self.frequency)
    
    def __repr__(self):
        return "Node({},{})".format(self.data,self.frequency)
        
    

In [25]:
## priority queue implementation using heapq module
import heapq

class PriorityQueue:    
    def __init__(self,init_list=None):
        
        if init_list !=None:
            heapq.heapify(init_list)
            self.heap = init_list
        else:
            self.heap=[]
        
    def put(self,item):
        heapq.heappush(self.heap,item)
        
    def get(self):        
        if self.size() !=0:
            return heapq.heappop(self.heap)
        return None
    
    def top(self):
        if self.size() !=0:
            return self.heap[0]
        return None
    
    def size(self):
        return len(self.heap)
    
        

In [68]:
## huffman encoding
from collections import Counter
import sys

def huffman_encoding(data):
    
    root = build_huffman_tree(data)    
    
    encoded_dict = traverse_to_get_encoded_value(root)
    print(encoded_dict)
    
    return ''.join([encoded_dict[letter] for letter in data])
    
def build_huffman_tree(data):
    
    ## get the unique letters and their frequency from data.Assumption here is that data is string and hence iterable 
    frequency_dict = dict(Counter(data))    
    ## prepare and add the data in a priority queue
    list_of_nodes = [ Node(letter,freq) for letter,freq in frequency_dict.items()] 
    pq = PriorityQueue(list_of_nodes)
    while pq.size() > 1:
        node_1 = pq.get() # pop first min element
        node_2 = pq.get() # pop second min element
        new_node = Node('',node_1.get_frequency()+node_2.get_frequency())
        if node_1.get_frequency() <= node_2.get_frequency():
            new_node.set_left_child(node_1)
            new_node.set_right_child(node_2)
        else:
            new_node.set_left_child(node_2)
            new_node.set_right_child(node_1)
        pq.put(new_node)
        
    return new_node


def traverse_to_get_encoded_value(node):
    
    encoded_mapping = dict()
    
    def _traverse_to_get_encoded_value(node,encoding):
        
        if node.has_left_child():
            _traverse_to_get_encoded_value(node.get_left_child(),encoding +'0')      
        
        if node.has_right_child():            
            
            _traverse_to_get_encoded_value(node.get_right_child(),encoding +'1')
            
        if node.get_data():
            encoded_mapping[node.get_data()]=encoding
            
    _traverse_to_get_encoded_value(node,'')
    
    return encoded_mapping
    
    
def huffman_decoding(data,tree):
    pass

In [70]:
huffman_encoding('AACDDEEEEEE')== '0000010011011111111'




{'A': '00', 'C': '010', 'D': '011', 'E': '1'}


True