In [4]:
class BinaryTreeNode:
    def __init__(self, data):
        self.data = data
        self.left = None
        self.right = None
        

In [14]:
class PriorityQueueNode:
    def __init__(self,ele,priority):
        self.ele = ele
        self.priority = priority
        
class PriorityQueue:
    def __init__(self):
        self.pq = []
    
    def isEmpty(self):
        return self.getSize() == 0
    
    def getSize(self):
        return len(self.pq)

    def getMin(self):
        if self.isEmpty():
            return None
        return self.pq[0].ele
    
    def __percolateUp(self):
        childIndex = self.getSize() - 1
        while childIndex > 0:
            parentIndex = (childIndex-1)//2
            
            if self.pq[parentIndex].priority > self.pq[childIndex].priority:
                self.pq[parentIndex],self.pq[childIndex] = self.pq[childIndex],self.pq[parentIndex]
                childIndex = parentIndex
            else:
                break
            
    def insert(self,ele,priority):
        pqNode = PriorityQueueNode(ele,priority)
        self.pq.append(pqNode)
        self.__percolateUp()
        
    def removeMin(self):
        if not self.isEmpty():
            temp = self.pq[0]
            self.pq[0], self.pq[-1] = self.pq[-1], self.pq[0]
            self.pq.pop()
            parent = 0
            while parent < self.getSize() - 1:
                childl = 2*parent + 1
                childr = 2*parent + 2
                if childl < self.getSize() - 1:
                    if self.pq[childl].priority < self.pq[parent].priority:
                        self.pq[childl], self.pq[parent] = self.pq[parent], self.pq[childl]
                        parent = childl

                elif childr < self.getSize() - 1:
                    if self.pq[childr].priority > self.pq[parent].priority:
                        self.pq[childr], self.pq[parent] = self.pq[parent], self.pq[childr]
                        parent = childr
                else:
                    break
            return temp
                

In [19]:
import os

class Huff:
    def __init__(self, path):
        self.frequency = PriorityQueue()
        self.path = path
        self.codes = dict()
        
        
    def makefrequencyqueue(self, text):
        temp = dict()
        for character in text:
            temp[character]= temp.get(character, 0) + 1
            
        for key in temp.keys():
            self.frequency.insert(key, temp[key])
    
    def buildtree(self):
        while self.frequency.getSize() > 1:
            x1 = self.frequency.removeMin()
            x2 = self.frequency.removeMin()
            n1 = BinaryTreeNode(x1)
            n2 = BinaryTreeNode(x2)
            newpriority = x1.priority + x2.priority
            x3 = BinaryTreeNode(None, newpriority)
            x3.left = n1
            x3.right = n2
            
            self.frequency.insert(x3)
            
        return
    
    def constructhelper(self, root, current_bits):
        if root == None:
            return
        if root.value != None:
            self.codes[root.value] = current_bits
            return
            
        self.constructhelper(root.left, current_bits+"0")
        self.constructhelper(root.right, current_bits+"1")
    
    def constructcode(self):
        root = self.frequency.removeMin()
        self.constructhelper(root, "")
        
    def getEncodedText(self, text):
        encoded_text = ""
        for char in text:
            encoded_text += self.codes[char]
        return encoded_text  
    
    def getPaddedEncodedText(self, encoded_text):
        # making the encoded text multiple of 8
        # first 8 bits to represent the first 8 bits
        # last bits padded as 0
        padded_amount = 8 - (len(encoded_text)%8)
        for i in range(padded_amount):
            encoded_text += '0'
        
        
        padded_info = "{0:08b}".format(padded_amount)
        
        padded_encoded_text = padded_info + encoded_text
        return padded_encoded_text
        
        
    def getbytesarray(self, padded_encoded_text):
        bytearray = []
        
        for i in range(0, len(padded_encoded_text), 8):
            bytes = padded_encoded_text[i:i+8]
            bytearray.append(int(bytes, 2))
            
        return bytearray
    
                             
        
    def compress(self):
        # get file from path
        # read text from file
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + '.bin'
        
        with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
        
            text = file.read()
            text = text.rstrip()
            
            # make frequency queue using the text
            self.makefrequencyqueue(text)

            # construct tree
            self.buildtree()

            # construct codes from binary tree
            self.constructcode()

            # creating encoded text using the codes
            encoded_text = self.getEncodedText(text)

            # put this encoded text into the binary file

            # pad the encoded text
            padded_encoded_text = self.getPaddedEncodedText(encoded_text)

            # return binary file as output
            bytes_array = self.getbytesarray(padded_encoded_text)
            final_bytes = bytes(bytes_array)

            output.write(final_bytes)
        print("compressed butch")
        return output_path
    
    
                             
path = '/home/abhinavjava/Data_Structures_Algorithms/README.md'
h = Huff(path)
output_path = h.compress()
        
        

KeyboardInterrupt: 