Skip to content

Commit

Permalink
more comments and more user-friendly interface
Browse files Browse the repository at this point in the history
  • Loading branch information
fat-crocodile committed May 22, 2014
1 parent cd362bc commit 4576fc5
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 66 deletions.
74 changes: 43 additions & 31 deletions huffman/bounded_huffman.py
Expand Up @@ -4,6 +4,49 @@
"A Fast Algorithm for Optimal Length-Limited Huffman Codes"
Journal of the Association for Computing Machinery, Vol. 37, No. 3, July 1990"""

def make_code_symbols(weights, limit):
"""Input:
- list of pairs (symbol, weight); symbols with weight 0 allowed
- code lenght limit
Output:
- list of pairs (symbol, code lenght) in the same order
"""
res = make_code([w for _,w in weights], limit)
return [(s, l) for (s,_),l in zip(weights, res)]

def make_code(weights, limit):
"""Input:
- symbols weights in alphabetical order (symbols with weight 0 allowed)
- code lenght limit
Output:
- symbols code lenghts in alphabetical order
"""

# sort by weight, exclude zero-weighted symbols and save original symbol position
positioned_weights = sorted((w, n) for n,w in enumerate(weights) if w > 0)

if len(positioned_weights) > 2**limit:
raise Exception('there are no such code')

coins = []

for level in range(limit, 0, -1):
# generate current level coins
new_coins = [(w, {i:level}) for w,i in positioned_weights]
# coins, merged from previous level coins
prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
# merge lists
coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))

# got results
res = [0] * len(weights)

for i in range(len(positioned_weights) * 2 - 2):
for k,v in coins[i][1].items():
if res[k] < v: res[k] = v

return res

def _merge_coins(c1, c2):
"""Merge two coins in one meta-coin. Each coin in pair (weight, {base coin id --> height in tree})"""
w = c1[0] + c2[0]
Expand Down Expand Up @@ -45,34 +88,3 @@ def _imerge(iter1, iter2, less_then = None):
yield i2
i2 = None


def make_code(weights, limit):
"""Input:
- symbols weights in alphabetical order (symbols with weight 0 allowed)
- code lenght limit
Output:
- symbols code lenghts in alphabetical order
"""
positioned_weights = sorted((w, n) for n,(_,w) in enumerate(weights) if w > 0)

if len(positioned_weights) > 2**limit:
raise Exception('there are no such code')

coins = []

for level in range(limit, 0, -1):
# current level coins
new_coins = [(w, {i:level}) for w,i in positioned_weights]
# coins, merged from previous level coins
prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
# merge lists
coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))

res = [(s,0) for s,_ in weights]

for i in range(len(positioned_weights) * 2 - 2):
for k,v in coins[i][1].items():
if res[k][1] < v: res[k] = (res[k][0], v)

return res

63 changes: 34 additions & 29 deletions huffman/coder.py
Expand Up @@ -4,39 +4,13 @@

_HuffmanRecord = namedtuple("_HuffmanRecord", "length start_code end_code symbols")

def _tables_from_lenghts(lens):
"""Make canonical huffman code tables from list of code leghts.
Input: list of pairs (symbol, code length), ordered by alphabet
Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
Tuples are sorted by length"""

# make dict {len --> [list of symbols in alphabet order]}
by_len = defaultdict(list)
for a, l in lens:
if l > 0:
by_len[l].append(a)

# lengths that exists in code
actual_lens = by_len.keys()
actual_lens.sort()

tables = []
code = 0
prev_len = 0

# fill table
for l in actual_lens:
code *= 2**(l-prev_len)
tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
code += len(by_len[l])
prev_len = l

return tables

class Decoder(object):
"""Decode symbol from bitsream"""

def __init__(self, lens):
"""Input:
- list or pairs (symbol, code lenght)
- OR list or code lenghts; in this case symbols are just integers"""
if isinstance(lens[0], int):
lens = [(n, l) for n,l in enumerate(lens)]

Expand Down Expand Up @@ -71,6 +45,9 @@ class Encoder(object):
"""Put symbol into bitstream"""

def __init__(self, lens):
"""Input:
- list or pairs (symbol, code lenght)
- OR list or code lenghts; in this case symbols are just integers"""
if isinstance(lens[0], int):
lens = [(n, l) for n,l in enumerate(lens)]

Expand All @@ -86,4 +63,32 @@ def put(self, bs, c):
v, n = self.code[c]
bs.put_be(v, n)

def _tables_from_lenghts(lens):
"""Make canonical huffman code tables from list of code leghts.
Input: list of pairs (symbol, code length), ordered by alphabet
Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
Tuples are sorted by length"""

# make dict {len --> [list of symbols in alphabet order]}
by_len = defaultdict(list)
for a, l in lens:
if l > 0:
by_len[l].append(a)

# lengths that exists in code
actual_lens = by_len.keys()
actual_lens.sort()

tables = []
code = 0
prev_len = 0

# fill table
for l in actual_lens:
code *= 2**(l-prev_len)
tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
code += len(by_len[l])
prev_len = l

return tables

16 changes: 12 additions & 4 deletions huffman/huffman.py
@@ -1,14 +1,22 @@
"""Implement classical Huffman alghorithm"""

def make_code(weights):
def make_code_symbols(weights):
"""Input:
- list of pairs (symbol, weight), simbols with weight 0 are allowed
Output:
- list of pairs (symbol, code lenght) in the same order"""
res = make_code([w for _,w in weights])
return [(s, l) for (s,_),l in zip(weights, res)]

def make_code(weights):
"""Input:
- list of symbols weights; simbols with weight 0 are allowed
Output:
- list of symbols code lenghts in the same order"""

# each items is:
# (weight, [(symbol1, len1), (symbol2, len2), ... ])
codes = [(w, [(i, 0)]) for i,(_,w) in enumerate(weights) if w > 0]
codes = [(w, [(i, 0)]) for i,w in enumerate(weights) if w > 0]
codes.sort(key=lambda x: x[0], reverse=True)

while len(codes) > 1:
Expand All @@ -26,9 +34,9 @@ def make_code(weights):
codes.insert(i, s)

# now all pairs (symbol, code_len) are contained in codes[0][1]
res = [(s,0) for s,_ in weights]
res = [0] * len(weights)

for i,l in codes[0][1]:
res[i] = (res[i][0], l)
res[i] = l

return res
4 changes: 2 additions & 2 deletions huffman/test_huffman.py
@@ -1,7 +1,7 @@
import sys
from coder import Encoder, Decoder
from huffman import make_code
from bounded_huffman import make_code as make_ll_code
from huffman import make_code_symbols as make_code
from bounded_huffman import make_code_symbols as make_ll_code # ll for lenght-limited

class DummyInputStream(object):
def __init__(self, data):
Expand Down

0 comments on commit 4576fc5

Please sign in to comment.