In [10]:

import os
import heapq
class BinaryTreeNode:
    def __init__(self,char,freq):
        self.char=char
        self.freq=freq
        self.left=None
        self.right=None
        
    def __lt__(self,other):
        return self.freq<other.freq
    def __eq__(self,other):
        return self.freq==other.freq


class HuffmanCoding:
    def __init__(self,path):
        self.path=path
        self.heap=[]
        self.codes={}
        self.reverseCodes={}
        
    def __make_frequency_dict(self,text):
        d=dict()
        for i in text:
            d[i]=d.get(i,0)+1
        return d
    
    def __buildHeap(self,freq_dict):
        heapq.heapify(self.heap)
        for key in freq_dict.keys():
            newNode=BinaryTreeNode(key,freq_dict[key])
            heapq.heappush(self.heap,newNode)
    
    def __buildTree(self):
        while (len(self.heap)>1):
            node1=heapq.heappop(self.heap)
            node2=heapq.heappop(self.heap)
            newNode=BinaryTreeNode(None,node1.freq+node2.freq)
            newNode.left=node1
            newNode.right=node2
            heapq.heappush(self.heap,newNode)
        return 
    
    def __buildCodesHelper(self,root,curr_bits):
        if root is None:
            return
        if root.char is not None:
            self.codes[root.char]=curr_bits
            self.reverseCodes[curr_bits]=root.char
            return
        
        self.__buildCodesHelper(root.left,curr_bits+"0")
        self.__buildCodesHelper(root.right,curr_bits+"1")
        
    
    def __buildCodes(self):
        root=heapq.heappop(self.heap)
        self.__buildCodesHelper(root,"")
    
    def __getEncodedText(self,text):
        encoded_text=""
        for char in text:
            encoded_text+=self.codes[char]
        return encoded_text
    
    def __getPaddedEncodedText(self,encoded_text):
        padded_amount=8-(len(encoded_text)%8)
        for i in range(padded_amount):
            encoded_text+="0"
        padded_info="{0:08b}".format(padded_amount)
        padded_encoded_text=padded_info+encoded_text
        return padded_encoded_text
    
    def __getBytesArray(self,padded_encoded_text):
        array=[]
        for i in range(0,len(padded_encoded_text),8):
              byte=padded_encoded_text[i:i+8]
              array.append(int(byte,2))
              
        return array
    
    
    def compress(self):
        #get the file
        #read text from file
        file_name,file_extension=os.path.splitext(self.path)
        output_path=file_name+".bin"
        with open(self.path,"r+") as file , open(output_path,"wb") as output:
            text=file.read()
            text=text.strip()
            #make frequency dictionary
            freq_dict=self.__make_frequency_dict(text)
            #Construct the heap from the frequency_dict
            self.__buildHeap(freq_dict)
            #Construct the binary tree from the heap
            self.__buildTree()
            #Create codes for every character and store it
            self.__buildCodes()
            #Encode the text
            encoded_text=self.__getEncodedText(text)
            #padd the text
            padded_encoded_text=self.__getPaddedEncodedText(encoded_text)
            bytes_array=self.__getBytesArray(padded_encoded_text)
            #Return the binary file as output
            final_bytes=bytes(bytes_array)
            output.write(final_bytes)
        print("Compressed")
        return output_path
    
    
    def __removePadding(self,text):
        padded_info=text[:8]
        extra_padding=int(padded_info,2)
        text=text[8:]
        text_after_padding_removed=text[:-1*extra_padding]
        return text_after_padding_removed
    
    def __decodeText(self,text):
        
        decoded_text=""
        current_bits=""
        for bit in text:
            current_bits+=bit
            if current_bits in self.reverseCodes:
                char=self.reverseCodes[current_bits]
                decoded_text+=char
                current_bits=""
        return decoded_text
    
    def decompress(self,input_path):
        file_name,file_extension=os.path.splitext(self.path)
        output_path=file_name+"_decompressed"+".txt"
        with open(self.path,"rb") as file , open(output_path,"w") as output:
            bit_string=""
            byte=file.read(1)
            while byte:
                byte=ord(byte)
                bits=bin(byte)[2:].rjust(8,'0')
                bit_string+=bits
                byte=file.read(1)
            actual_text=self.__removePadding(bit_string)
            decompressed_text=self.__decodeText(actual_text)
            output.write(decompressed_text)
path="text.txt"
h=HuffmanCoding(path)
output_path=h.compress()  
h.decompress(output_path)        

Compressed
