## Huffmann Encoding

### Huffmann Tree
<ul>
    <li>Create a Leaf Node for each unique character and build a minheap for all leaf nodes</li>
    <li>Extract 2 Nodes with minimum frequency from the minheap</li>
    <li>Create a new internal node with a frequency equal to the sum of the two nodes frequencies.<br> Make the first extracted node as its left child and the other extracted node as its right child.<br> Add this node to the min heap.</li>
    <li>Repeat steps #2 and #3 until the heap contains only one node.<br> The remaining node is the root node and the tree is complete.</li>
</ul>

In [1]:
class HuffmanTreeNode:
    def __init__(self ,char ,freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
    
    def __str__(self):
        return self.char+" "+str(self.freq)

In [2]:
class MinHeap:
    
    def __init__(self ,capacity) :
        self.capacity = capacity
        self.heap = [0 for i in range(capacity)]
        self.size = 0
        
    def minHeapify(self ,i) :
        
        smallest = i    # Root
        l = 2 * i + 1   # Left Child
        r = 2 * i + 2   # Right Child
        
        if l < self.size and self.heap[l].freq <= self.heap[smallest].freq :
            smallest = l
        
        if r < self.size and self.heap[r].freq <= self.heap[smallest].freq :
            smallest = r
        
        if smallest != i :
            self.heap[smallest] ,self.heap[i] = self.heap[i] ,self.heap[smallest]
            self.minHeapify(smallest)  
    
    def insertKey(self ,node) :
        
        self.heap[self.size] = node
        child = self.size
        parent = (child-1)//2
        self.size += 1
        
        while child > 0 and self.heap[child].freq < self.heap[parent].freq:
            self.heap[child] ,self.heap[parent] = self.heap[parent] ,self.heap[child]
            child = parent # child go to 1 level up
            parent = (parent-1)//2 # parent go to 1 level up
    
    def deleteKey(self, i) :

        if self.size <= i:
            return -1

        self.heap[i] = self.heap[self.size-1]
        self.heap[self.size-1] = 0
        self.size -= 1
        self.minHeapify(i)
    
    def peek(self):
        
        if self.size < 0:
            return -1
        
        return self.heap[0]
        
    def extractMin(self):
        
        if self.size < 0:
            return -1

        min_ = self.heap[0]
        self.deleteKey(0)        
        return min_  
    
    def printData(self):
        if self.size == -1:
            return -1

        for i in range(self.size):
            print(self.heap[i].char," ",self.heap[i].freq)

In [3]:
def createAndBuildMinHeap(char ,freq ,size):
    
    heap = MinHeap(size)
    
    for i in range(size):
        heap.insertKey(HuffmanTreeNode(char[i] ,freq[i]))
    
    while heap.size > 1:
        left = heap.extractMin()
        right = heap.extractMin()
        
        node = HuffmanTreeNode('$' ,left.freq+right.freq)
        node.left = left
        node.right = right
        
        heap.insertKey(node)
    
    return heap.extractMin() # root of the Huffman Tree

#### Steps for traversing the Huffman Tree
<ol>
    <li>Create an auxiliary array.</li>
    <li>Traverse the tree starting from root node.</li>
    <li>Add 0 to arraywhile traversing the left child and add 1 to array while traversing the right child.</li>
    <li>Print the array elements whenever a leaf node is found.</li>
</ol>

In [4]:
def HuffmanCodes(char ,freq ,size): 
  
    # construct Huffman Tree
    root = createAndBuildMinHeap(char ,freq ,6)
    
    # calc depth of huffman tree
    depth = heightOfATree(root)
    arr = [0 for i in range(depth)]
    
    printCodes(root, arr, 0)

In [5]:
def heightOfATree(root):
    
     if root is None:
        return 0
     else:
        count_left = heightOfATree(root.left)
        count_right = heightOfATree(root.right)
        depth = 1+max(count_left ,count_right)
        return depth

In [6]:
def printCodes(root ,arr ,top):
    
    if root.left:
        arr[top] = 0
        printCodes(root.left ,arr ,top+1)
    
    if root.right:
        arr[top] = 1 
        printCodes(root.right ,arr ,top+1)
    
    if isLeaf(root):
        print(root.char," : ",end="")
        printArr(arr ,top)

In [7]:
def isLeaf(root):
    return not root.left and not root.right

In [8]:
def printArr(arr ,top):
    for i in range(top):
        print(arr[i],end="")
    print()

In [9]:
char = ['a','b','c','d','e','f']
freq = [5 ,9 ,12 ,13 ,16 ,45]

HuffmanCodes(char ,freq ,6)

f  : 0
c  : 100
d  : 101
a  : 1100
b  : 1101
e  : 111


## Huffmann Decoding

```
Input Data : AAAAAABCCCCCCDDEEEEE
Frequencies : A: 6, B: 1, C: 6, D: 2, E: 5
Encoded Data : 
0000000000001100101010101011111111010101010
Huffman Tree: '#' is the special character used
              for internal nodes as character field
              is not needed for internal nodes. 
               #(20)
             /       \
        #(12)         #(8)
     /      \        /     \
    A(6)     C(6) E(5)     #(3)
                         /     \
                       B(1)    D(2)  
Code of 'A' is '00', code of 'C' is '01', ..
Decoded Data : AAAAAABCCCCCCDDEEEEE


Input Data : GeeksforGeeks
Character With there Frequencies
e 10, f 1100, g 011, k 00, o 010, r 1101, s 111
Encoded Huffman data :
01110100011111000101101011101000111
Decoded Huffman Data :
geeksforgeeks

```

In [10]:
class HuffmanTreeNode:
    def __init__(self ,char ,freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
        
    def __str__(self):
        return self.char+" "+str(self.freq)

In [11]:
def isLeaf(root):
    return not root.left and not root.right

In [12]:
def decodedString(root ,encoded):
    string = ""
    
    curr = root
    
    # curr char is 1 moves right onto a tree
    # curr char is 0 moves left onto a tree
    # reach leaf print the data and do it again from root
    for i in encoded:
        if i == '0':
            curr = curr.left
        else:
            curr = curr.right
        
        if curr.left is None and curr.right is None:
            string += curr.char
            curr = root # do it again from root
    
    return string

In [13]:
def printCodes(root ,codes ,string):
    if root is None:
        return
    
    if root.char != '$':
        codes[root.char] = string
    
    printCodes(root.left ,codes ,string+'0')
    printCodes(root.right ,codes ,string+'1')

In [14]:
def huffmanCodes(freq ,size):
    
    # Create min-heap and build Min-heap
    heap = MinHeap(size)
    for i in freq:
        heap.insertKey(HuffmanTreeNode(i ,freq[i]))
    
    # Create a huffman tree 
    # with single remaining node 
    # left in minheap, which is
    # root of the huffman tree
    while heap.size > 1:
        left = heap.extractMin()
        right = heap.extractMin()
        
        node = HuffmanTreeNode('$' ,left.freq+right.freq)
        node.left = left
        node.right = right
        
        heap.insertKey(node)
    
    return heap.peek()    

In [15]:
# EnCoding and DeCoding 
def HuffmanCoding(string):
    
    freq = {} # To store the frequency of string
    codes = {} # To store the Huffman coding 
    
    # Calc the freq of the String 
    for i in string:
        if i in freq:
            freq[i] += 1
        else:
            freq[i] = 1
    
    # Build an Huffman Tree and store encoding
    root = huffmanCodes(freq ,len(string))
    printCodes(root ,codes ,"")
    
    # Print Encoded data
    print("Character with These Frequency")
    for i in codes:
        print(i," : ",codes[i])
    
    # Encoded String
    print()
    encoded=""
    for i in string:
        encoded += codes[i]
    print('Encoded String :',encoded)
    
    # Decode the String
    print('Decoded String :',end=" ")
    decode = decodedString(root ,encoded)
    print(decode)

In [16]:
string = 'geeksforgeeks'
HuffmanCoding(string)

Character with These Frequency
g  :  00
r  :  010
k  :  011
e  :  10
f  :  1100
o  :  1101
s  :  111

Encoded String : 00101001111111001101010001010011111
Decoded String : geeksforgeeks
