In [155]:
import sys

class Node:
    '''
    A hybride linked-list/binary tree node that stores a character
    and the character's frequency
    '''
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
        self.next = None
        
    def __repr__(self):
        return f"({self.char}, {self.freq})"

class Tree:
    def __init__(self, root):
        self.root = root
        
    def huffman_code(self):
        h_codes = dict()
        
        def traverse(code, node):
            if node.char:
                h_codes[node.char] = code
                return
                
            traverse(code + '0', node.left)
            traverse(code + '1', node.right)
        
        traverse('', self.root)
        
        return h_codes
        
class Queue:
    '''
    A priority queue
    '''
    def __init__(self):
        self.head = None
        self.size = 0
        
    def enqueue(self, new_node):
        '''
        Appends node ahead of larger frequencies or the end
        '''
        if new_node is None:
            return
        
        node = self.head
        if node is None or node.freq > new_node.freq:
            self.head = new_node
            new_node.next = node
            self.size += 1
            return
        
        previous = None
        while node and node.freq <= new_node.freq:
            previous = node
            node = node.next
        
        new_node.next = node
        previous.next = new_node
        self.size += 1
        
    def dequeue(self):
        head = self.head
        
        if head is None:
            return None
    
        self.head = head.next
        self.size -= 1
        return head
    
    def __repr__(self):
        node = self.head
        all_nodes = []
        while(node):
            all_nodes.append(str(node))
            node = node.next
         
        return ', '.join(all_nodes)

def huffman_encoding(data):
    counts = dict()
    #Count frequency
    for char in data:
        if char in counts:
            counts[char] += 1
        else:
            counts[char] = 1
    
    q = Queue()
    
    for char in counts:
        q.enqueue(Node(char, counts[char]))
    
    #Create tree
    while q.size > 1:
        left = q.dequeue()
        right = q.dequeue()
        merger = Node(None, left.freq + right.freq)
        merger.left = left
        merger.right = right
        q.enqueue(merger)
        
    
    tree = Tree(q.head)
    h_codes = tree.huffman_code()
    encoded_data = ''.join([h_codes[char] for char in data])
   
    return encoded_data, Tree(q.head)

def huffman_decoding(data, tree):
    decoded = []
    
    node = tree.root
    
    for bit in data:
        if bit == '0':
            node = node.left
        else:
            node = node.right
        
        if node.char:
            decoded.append(node.char)
            node = tree.root
            
    return ''.join(decoded)



In [156]:
a_great_sentence = "abra kadabra alakazam"

print ("The size of the data is: {}\n".format(sys.getsizeof(a_great_sentence)))
print ("The content of the data is: {}\n".format(a_great_sentence))

encoded_data, tree = huffman_encoding(a_great_sentence)


The size of the data is: 70

The content of the data is: abra kadabra alakazam



In [157]:
print ("The size of the encoded data is: {}\n".format(sys.getsizeof(int(encoded_data, base=2))))
print ("The content of the encoded data is: {}\n".format(encoded_data))

The size of the encoded data is: 32

The content of the encoded data is: 011001101011101111010000110011010111001001011110101001011



In [158]:


decoded_data = huffman_decoding(encoded_data, tree)

print ("The size of the decoded data is: {}\n".format(sys.getsizeof(decoded_data)))
print ("The content of the encoded data is: {}\n".format(decoded_data))

The size of the decoded data is: 70

The content of the encoded data is: abra kadabra alakazam



In [128]:
len(encoded_data)

57

In [129]:
sum([int(c) for c in encoded_data])

32

In [142]:
n = int(encoded_data, base = 2)
sys.getsizeof(n)

32

In [132]:
sys.getsizeof(encoded_data)

106

In [137]:
sixteen = int('10000',2)
thirtyone = int('11111', 2)
sys.getsizeof(sixteen)

28

In [138]:
sys.getsizeof(thirtyone)

28

In [141]:
2**(32)

4294967296