## A Priority Queue needs to be built and all characters with their priorities need to be pushed to it before we could build the huffman tree


In [37]:
## Priority Queue Implementation  using array (python lists) as a backing data structure 

In [38]:
from collections import deque

class EmptyQueueError(Exception):
    pass

class PQNode(object):
    """ Huffman Node containing data and priority"""
    def __init__(self,data,priority):
        self.data=data
        self.pr=priority
        self.leftchild=None
        self.rightchild=None
        
class PQueue(object):
    """ Backing Priority Queue for implementing Huffman Tree"""
    def __init__(self):
        self.items=[]
        self.nitems=0
        
    def enqueue(self,data,pri):
        self.items.append(PQNode(data,pri))
        self.items=sorted(self.items, reverse=False ,key=lambda x: x.pr) 
        self.nitems +=1
        
    def enqueue_node(self,node):
        self.items.append(node)
        self.items=sorted(self.items, reverse=False ,key=lambda x: x.pr) 
        self.nitems +=1
        
        
    def is_empty(self):
        return self.items==[]
    
    def dequ(self):
        if self.is_empty(): 
            raise EmptyQueueError
        
        self.nitems -=1
        return self.items.pop(0)
    
    def size(self):
        return self.nitems
            
    def display(self):
        
        if not self.is_empty():
            print([(i.data , i.pr) for i in self.items])
        else :
            print("Empty Queue")
    
    @staticmethod 
    def text_freq_dic(text):
        frequency = {}
        for character in text:
            if not character in frequency:
                frequency[character] = 0
            frequency[character] += 1
        return frequency
    
    @staticmethod 
    def PQ_Text(text):
        d= PQueue.text_freq_dic(text)
        PQ=PQueue()
        for key in d:
            PQ.enqueue(key,d[key])
        return PQ.items
    
    

    def build_from_text(self,text):
        self.items=self.PQ_Text(text)
        self.nitems=len(self.items)
        
    def build_tree(self):
        while self.size()> 1:
            n1= self.dequ()
            n2= self.dequ()
            b= HuffmanTree()
            b.root=PQNode('**', (n1.pr +n2.pr))
            b.root.leftchild=n1
            b.root.rightchild=n2
            self.enqueue_node(b.root)
            
            

In [39]:
## Now let's build a binary tree

In [40]:
class HuffmanTree(object):
    
    def __init__(self):
        self.root=None
        
    def is_empty(self):
        return self.root==None
    
    def preorder(self):
        self._preorder(self.root)
    def _preorder(self,p):
        if p is None:
            return 
        print((p.data,p.pr)," ",end='')
        self._preorder(p.leftchild)
        self._preorder(p.rightchild)

    def inorder(self):
        self._inorder(self.root)
    def _inorder(self,p):
        if p is None:
            return 
        self._inorder(p.leftchild)
        print((p.data,p.pr)," ",end='')
        self._inorder(p.rightchild)
        
    def postorder(self):
        self._postorder(self.root)
        
    def _postorder(self,p):
        if p is None:
            return 
        self._postorder(p.leftchild)
        self._postorder(p.rightchild)
        print((p.data,p.pr)," ",end='')
        
    def levelorder(self):
        if self.root is None:
            print("Tree is Empty")
            return
        qu=deque()
        qu.append(self.root)
        while len(qu)!=0:
            p=qu.popleft()
            print((p.data,p.pr)," ",end='')
            if p.leftchild is not None:
                qu.append(p.leftchild)
            if p.rightchild is not None:
                qu.append(p.rightchild)
                
    def height(self):
        return self._height(self.root)
    
    def _height(self,p):
        if p is None:
            return 0 
        left_height=self._height(p.leftchild)
        right_child=self._height(p.rightchild)
        return 1+max(left_height,right_child)
    
    def build_HfTree_from_Txt(self,text):
        if text=="":
            print("Empty Text!")
            return None
        PQ=PQueue()
        PQ.build_from_text(text)
        PQ.build_tree()
        self.root=PQ.items[0]
        
    def search_node(self,key):
        node=self._search_node(key,self.root)
        return node
    
    def _search_node(self,key,node):
        if node!=None:
            if node.data==key:
                return node
            else:
                n=self._search_node(key,node.leftchild)
                if n is not None:
                    return n
                else:
                    m=self._search_node(key,node.rightchild)
                    if m is not None:
                        return m 
        return None
    
    def encode(self,node_s):
        path=self.Path_to_node(node_s)
        
        dic={"left":"0" , "right":"1"}
        st=""
        for i in path[1:]:
            st+=(dic[i[1]])
        return st
            
        
    def Path_to_node(self,node_s):
        path=[]
        self._Path_to_node(self.root,node_s,path,'root')

        return path 
    
    def _Path_to_node(self,root,node_s,path,direction):
        ## Basic case:
        if root==None:
            return 0
        
        path.append((root.data, direction))
        
        ## If node is found 
        if root.data==node_s.data:
            return 1
        
        if (self._Path_to_node(root.leftchild,node_s,path,'left')) or  (self._Path_to_node(root.rightchild,node_s,path,'right')):
            return True 
        
        path.pop()
        

In [41]:
import sys

def huffman_encoding(text):
    b=HuffmanTree()
    b.build_HfTree_from_Txt(text)
    Mapping={}
    encoding=""
    for ch in text:
        if ch in Mapping.keys():
            encoding+=Mapping[ch]
        else: 
            node=b.search_node(ch)
            Mapping[ch]=b.encode(node)
            encoding+=Mapping[ch]
            
    return encoding , b 

def huffman_decoding(text_code,tree):
    
    decoded=""
    node=tree.root
    ## Traverse the tree according to the text_code
    for i in text_code:
        if i=="1":
            node=node.rightchild
            if node.leftchild is None and node.rightchild is None:
                decoded+=node.data
                node=tree.root
        else:
            node=node.leftchild
            if (node.leftchild is None) and (node.rightchild is None):
                decoded+=node.data
                node=tree.root
                
    return decoded

In [42]:
import sys

if __name__ == "__main__":
    codes = {}

    sentences_to_try=["","The bird is the word","121232" , "121212xds" ]
    
    for i in sentences_to_try:
        print("#"*100)
        print("Test Case for a new sentence")
        print("#"*100)
        a_great_sentence = i

        print ("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
        print ("The content of the data is: {}\n".format(a_great_sentence))

        encoded_data, tree = huffman_encoding(a_great_sentence)
        
        if encoded_data!="":
            print ("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
            print ("The content of the encoded data is: {}\n".format(encoded_data))

            decoded_data = huffman_decoding(encoded_data, tree)
            
            if decoded_data!="":
                print ("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
                print ("The content of the encoded data is: {}\n".format(decoded_data))

    


####################################################################################################
Test Case for a new sentence
####################################################################################################
The size of the data is: 49

The content of the data is: 

Empty Text!
####################################################################################################
Test Case for a new sentence
####################################################################################################
The size of the data is: 69

The content of the data is: The bird is the word

The size of the encoded data is: 36

The content of the encoded data is: 0110111011111100111000001010110000100011010011110111111010101011001010

The size of the decoded data is: 69

The content of the encoded data is: The bird is the word

####################################################################################################
Test Case for a new sentence
##################