diff --git a/Competitive Coding/Greedy/Huffman Coding/Huffman_Coding.py b/Competitive Coding/Greedy/Huffman Coding/Huffman_Coding.py new file mode 100644 index 000000000..3273104df --- /dev/null +++ b/Competitive Coding/Greedy/Huffman Coding/Huffman_Coding.py @@ -0,0 +1,58 @@ +string = 'BCAADDDCCACACAC' + + +# Creating tree nodes +class NodeTree(object): + + def __init__(self, left=None, right=None): + self.left = left + self.right = right + + def children(self): + return (self.left, self.right) + + def nodes(self): + return (self.left, self.right) + + def __str__(self): + return '%s_%s' % (self.left, self.right) + + +# Main function implementing huffman coding +def huffman_code_tree(node, left=True, binString=''): + if type(node) is str: + return {node: binString} + (l, r) = node.children() + d = dict() + d.update(huffman_code_tree(l, True, binString + '0')) + d.update(huffman_code_tree(r, False, binString + '1')) + return d + + +# Calculating frequency +freq = {} +for c in string: + if c in freq: + freq[c] += 1 + else: + freq[c] = 1 + +freq = sorted(freq.items(), key=lambda x: x[1], reverse=True) + +nodes = freq + +while len(nodes) > 1: + (key1, c1) = nodes[-1] + (key2, c2) = nodes[-2] + nodes = nodes[:-2] + node = NodeTree(key1, key2) + nodes.append((node, c1 + c2)) + + nodes = sorted(nodes, key=lambda x: x[1], reverse=True) + +huffmanCode = huffman_code_tree(nodes[0][0]) + +print(' Char | Huffman code ') +print('----------------------') +for (char, frequency) in freq: + print(' %-4r |%12s' % (char, huffmanCode[char])) diff --git a/Competitive Coding/Greedy/Huffman Coding/Readme.md b/Competitive Coding/Greedy/Huffman Coding/Readme.md new file mode 100644 index 000000000..767167e53 --- /dev/null +++ b/Competitive Coding/Greedy/Huffman Coding/Readme.md @@ -0,0 +1,18 @@ +Python Implementation of Huffman Coding using Greedy Approach. +Huffman Coding is a technique of compressing data to reduce its size without loss of data. It is generally useful to compress the data in which there are frequently occurring characters. +It follows a Greedy approach since it deals with generating minimum length prefix-free binary codes. +It uses variable-length encoding scheme for assigning binary codes to characters depending on how frequently they occur in the given text. +Priority Queue is used for building the Huffman tree such that the character that occurs most frequently is assigned the smallest code and the one that occurs least frequently gets the largest code. +It follows this procedure: - + +Create a leaf node for each character and build a min heap using all the nodes (The frequency value is used to compare two nodes in min heap) + +Repeat Steps 3 to 5 while heap has more than one node + +Extract two nodes, say x and y, with minimum frequency from the heap + +Create a new internal node z with x as its left child and y as its right child. Also frequency(z)= frequency(x)+frequency(y) + +Add z to min heap + +Last node in the heap is the root of Huffman tree