<a href="https://colab.research.google.com/github/deguc/shannon/blob/main/huffman.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import heapq
from collections import defaultdict


def huffman_frequency(data):

  result=defaultdict(int)

  for char in data:
    result[char] += 1

  return dict(result)


def huffman_tree(data,frequency):

  heap=[[weight,[char,""]] for char,weight in frequency.items()]
  heapq.heapify(heap)

  while(len(heap) > 1):
    lo=heapq.heappop(heap)
    hi=heapq.heappop(heap)

    for pair in lo[1:]:
      pair[1] = "0"+pair[1]
    for pair in hi[1:]:
      pair[1] = "1"+pair[1]

    heapq.heappush(heap,[lo[0]+hi[0]]+lo[1:]+hi[1:])

  return sorted(heapq.heappop(heap)[1:],key=lambda x:(len(x[1]),x[0]))


def huffman_encode(data,tree):

  dictionary={char:code for char,code in tree}
  result=""

  for char in data:
    result += dictionary[char]

  return result


def huffman_decode(data,tree):

  dictionary={code:char for char,code in tree}
  result=""
  code=""

  for bit in data:
    code += bit
    if code in dictionary:
      result += dictionary[code]
      code=""

  return result


data="aaaaaaaabbccef"

frequency=huffman_frequency(data)
tree=huffman_tree(data,frequency)
encoded=huffman_encode(data,tree)
decoded=huffman_decode(encoded,tree)

print("data : ",data)
print("frequency : ",frequency)
print("huffman_tree : ",tree)
print("encoded : ",encoded)
print("decoded : ",decoded)

data :  aaaaaaaabbccef
frequency :  {'a': 8, 'b': 2, 'c': 2, 'e': 1, 'f': 1}
huffman_tree :  [['a', '1'], ['b', '010'], ['c', '011'], ['e', '000'], ['f', '001']]
encoded :  11111111010010011011000001
decoded :  aaaaaaaabbccef
