In [13]:
import time
import heapq
import numpy as np
import sys
import pickle
import cv2
import os

# create treenode for huffman coding
class node:
    def __init__(self, weight = 0, item = None, value = None, children = []):
        self.weight = weight
        self.item = item # name node
        self.value = value # code work
        self.children = children

    # comparision on priority queue
    def __lt__(self, other):
        return self.weight < other.weight
    def __gt__(self, other):
        return self.weight > other.weight

    # update codework on tree (start from root)
    def update_value(self):
        for child in self.children:
            if self.value != None:
                child.value = self.value + child.value
            child.update_value()
    
class Compress:
    
    def __init__(self):
        self.img = None
        self.compression = None


    # create frequency of all items in array
    def _frequency(self, arr):
        freq = []
        # sort the array
        arr = sorted(arr)
        
        mark = 0
        item = arr[0]

        i = 0
        l = len(arr)
        while i < l:
            while i < l and arr[i] == item:
                i = i + 1
     
            tup = node(weight = i - mark, item = item)
            freq += [tup]
            mark = i
            if i < l:
                item = arr[i]

        return freq

    # create dictionary for an array
    def _createdict(self, arr):
        # create Heap
        freq = self._frequency(arr)
        heapq.heapify(freq)
        
        encode_arr = []
        encoded_dict = {}
        l = len(freq)

        # pop 2 smallest item and push back the new parent until 1 node left
        while l > 1:

            # node1: left = '0'
            # node2: right = '1'
            node1 = heapq.heappop(freq) # 0
            node2 = heapq.heappop(freq) # 1

            # if they are not list in encode_arr, we push them in
            if node1.item != None:
                encode_arr += [node1]
            if node2.item != None:
                encode_arr += [node2]
            
            node1.value = '0'
            node2.value = '1'

            # create parent node with weight = sum of children weight
            parent = node(weight = node1.weight + node2.weight,
                          item = None,
                          value = None,
                          children = [node1,node2])

            # push the parent node in to heap
            heapq.heappush(freq, parent)
    
            l = l - 1

        # set the root node and update all values (codework)
        root = freq[0]
        root.value = None
        root.update_value()

        # create dict from encoded arr
        for item in encode_arr:
            encoded_dict[item.item] = item.value
        
        return encoded_dict

    # encode the array with dict created
    def _create_encode_text(self, arr):
        encoded_dict = self._createdict(arr)
        encoded_text = ''
        for x in arr:
            encoded_text += str(encoded_dict[x])
        return encoded_text, encoded_dict

    # convert encoded text to bytearray
    # http://bhrigu.me/blog/2017/01/17/huffman-coding-python-implementation/
    def encode_to_binary(self, arr):
        encoded_text, encoded_dict = self._create_encode_text(arr)
        # create padding
        pad = 8 - len(encoded_text) % 8
        encoded_text = encoded_text + '{0:08b}'.format(pad)
        for i in range(pad):
            encoded_text = '0' + encoded_text

        # create byte array as result
        b = bytearray()
        for i in range(0, len(encoded_text), 8):
            byte = encoded_text[i:i+8]
            b.append(int(byte,2))

        # create 2 array for encoded_dict
        name = bytearray()
        codework = []
        for x,y in encoded_dict.items():
            name.append(x)
            codework.append(y)
        return b, name, codework
    
    def uncompress(self, filename, uncompress): # uncompress image
        self.img = cv2.imread(filename)
        with open(uncompress, 'wb') as w:
            pickle.dump(self.img, w)
        return self.img
    def _convert_to_vectors(self):

        w,h,c = self.img.shape
        img = cv2.cvtColor(self.img, cv2.COLOR_BGR2YCrCb)
        y, cr, cb = img[:,:,0], img[:,:,1], img[:,:,2]
        y = np.array(y.reshape((1,w*h))[0])
        cr = np.array(cr.reshape((1,w*h))[0])
        cb = np.array(cb.reshape((1,w*h))[0])
        
        return y, cr, cb

    def compress_image(self, filename):
        try:
            w,h,c = self.img.shape
            y, cr, cb = self._convert_to_vectors()
        
            encoded_y = self.encode_to_binary(y)
            encoded_cr = self.encode_to_binary(cr)
            encoded_cb = self.encode_to_binary(cb)

            t = (encoded_y, encoded_cr, encoded_cb, w, h)
            with open(filename, 'wb') as w:
                pickle.dump(t,w)
           
            print('Compress sucessful, file saved as \'%s\'!!!' %(filename))
        except Exception:
            print('failed to compress')

class Decompress:
    
    def __init__(self):
        self.img = None
        self.compressed = None

    def loads(self, filecompressed):
        try:
            with open(filecompressed, 'rb') as r:
                self.compressed = pickle.load(r)
                
        except Exception:
            print('Unable to load compressed file')


    def _decompressHuff(self, encoded):
        
        file, name, codework = encoded
        
        #decode from bytearray
        encoded_text = ''
        for byte in file:
            encoded_text += '{0:08b}'.format(byte)
        
        #get padding and reconstruct encoded_text
        padding = encoded_text[len(encoded_text) - 8:]
        pad = int(padding,2)
        encoded_text = encoded_text[:len(encoded_text) - 8]
        encoded_text = encoded_text[pad:]

        #create dictionary
        encoded_dict = {}
        for i,cw in enumerate(codework):
            encoded_dict[cw] = name[i]

        # decompress
        result = []
        i = 0
        ch = ''
        while i < len(encoded_text):
            ch += encoded_text[i]
            if ch in codework:
                result += [encoded_dict[ch]]
                ch = ''
            i += 1
        return np.array(result)
    
    def _unpack(self):
        if self.compressed:
            encoded_y, encoded_cr, encoded_cb, w, h = self.compressed

            '''
            decompress
            '''
            y = self._decompressHuff(encoded_y)
            cr = self._decompressHuff(encoded_cr)
            cb = self._decompressHuff(encoded_cb)
            
            return y, cr, cb, w, h
        
    def _constructImage(self):
        y, cr, cb, w, h = self._unpack()

        y = y.reshape((w,h))
        cr = cr.reshape((w,h))
        cb = cb.reshape((w,h))

        img = np.zeros((w,h, 3), dtype = np.uint8)

        img[:,:,0], img[:,:,1], img[:,:,2] = y, cr, cb

        self.img = cv2.cvtColor(img, cv2.COLOR_YCrCb2BGR)

    def DecompressImage(self, filecompress, fileimage):
        try:
            self.loads(filecompress)
            self._constructImage()
            with open(fileimage, 'wb') as r:
                pickle.dump(self.img,r)
            
            print('Decompress sucessful!!! Image save as \'%s\'' %(fileimage))
        except Exception:
            print('failed to decompress')



if __name__ == "__main__":

    path_to_original_image = '60243997_292532988294726_977913443016988893_n.jpg'
    #path_s = cv2.imread(path_to_original_image)
    path_to_compressed_file = 'Compressed' 
    path_to_uncompressed_image = 'Uncompressed' 
    path_to_new_image = 'Reconstructed'
    filepath = os.path.join(os.getcwd(), path_to_original_image)
    oldsize = os.stat(filepath).st_size
    
    print(' Tên ảnh   : ', path_to_original_image )
    print(' Kích cỡ file  : ', oldsize ,'\n')

    # compress using huff method
    print(' Tiến hành chuyển về size ảnh gốc')
    x = Compress()
    x.uncompress(path_to_original_image, path_to_uncompressed_image)
    print(' Ảnh gốc của ảnh đã chuyển :', path_to_uncompressed_image)
    new_size = os.stat(path_to_uncompressed_image).st_size
    print('- Size của file ảnh gốc: ', new_size)
    print ('\n--- Tiến hành nén ---')
    start_time = time.time()
    x.compress_image(path_to_compressed_file)
    elapsed_time = time.time() - start_time
    print("Stat: ")
    print('- Thời gian thực thi nén : ', elapsed_time)
    log =  "- Input File: " + str(path_to_uncompressed_image) + "\n- Output File: " + str(path_to_compressed_file)
    print(log)
    new_size = os.stat(path_to_compressed_file).st_size
    print('- Size của file sau khi nén: ', new_size)

    # decompress
    y = Decompress()
    y.loads(path_to_compressed_file)
    print ('\n--- Tiến hành giải nén ---')
    y.DecompressImage(path_to_compressed_file, path_to_new_image)
    elapsed_time = time.time() - start_time
    print("Stat: ")
    print('- Thời gian thực thi giải nén : ', elapsed_time)
    log =  "- Input File: " + str(path_to_compressed_file) + "\n- Output File: " + str(path_to_new_image)
    print(log)
    new_size = os.stat(path_to_new_image).st_size
    print('- Size của file sau khi giải nén: ', new_size)

    
    # show image
    before = pickle.load(open(path_to_uncompressed_image,'rb'))
    after = pickle.load(open(path_to_new_image, 'rb'))
    
    cv2.imshow('Uncompressed', before)
    cv2.imshow('Reconstructed', after)
    cv2.imwrite(path_to_uncompressed_image +'.jpg', before)
    cv2.imwrite(path_to_new_image+'.jpg', after)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


 Tên ảnh   :  bien-dau-thuong-thanh-hieu-qua-1560298882-400.jpg
 Kích cỡ file  :  18256 

 Tiến hành chuyển về size ảnh gốc
 Ảnh gốc của ảnh đã chuyển : Uncompressed
- Size của file ảnh gốc:  554564

--- Tiến hành nén ---
Compress sucessful, file saved as 'Compressed'!!!
Stat: 
- Thời gian thực thi nén :  0.44742727279663086
- Input File: Uncompressed
- Output File: Compressed
- Size của file sau khi nén:  202104

--- Tiến hành giải nén ---
Decompress sucessful!!! Image save as 'Reconstructed'
Stat: 
- Thời gian thực thi giải nén :  3.2880921363830566
- Input File: Compressed
- Output File: Reconstructed
- Size của file sau khi giải nén:  554564
