Permalink
Browse files

more comments and more user-friendly interface

  • Loading branch information...
fat-crocodile committed May 22, 2014
1 parent cd362bc commit 4576fc56c83b39243778eed6a78674ac8614f924
Showing with 91 additions and 66 deletions.
  1. +43 −31 huffman/bounded_huffman.py
  2. +34 −29 huffman/coder.py
  3. +12 −4 huffman/huffman.py
  4. +2 −2 huffman/test_huffman.py
View
@@ -4,6 +4,49 @@
"A Fast Algorithm for Optimal Length-Limited Huffman Codes"
Journal of the Association for Computing Machinery, Vol. 37, No. 3, July 1990"""
+def make_code_symbols(weights, limit):
+ """Input:
+ - list of pairs (symbol, weight); symbols with weight 0 allowed
+ - code lenght limit
+ Output:
+ - list of pairs (symbol, code lenght) in the same order
+ """
+ res = make_code([w for _,w in weights], limit)
+ return [(s, l) for (s,_),l in zip(weights, res)]
+
+def make_code(weights, limit):
+ """Input:
+ - symbols weights in alphabetical order (symbols with weight 0 allowed)
+ - code lenght limit
+ Output:
+ - symbols code lenghts in alphabetical order
+ """
+
+ # sort by weight, exclude zero-weighted symbols and save original symbol position
+ positioned_weights = sorted((w, n) for n,w in enumerate(weights) if w > 0)
+
+ if len(positioned_weights) > 2**limit:
+ raise Exception('there are no such code')
+
+ coins = []
+
+ for level in range(limit, 0, -1):
+ # generate current level coins
+ new_coins = [(w, {i:level}) for w,i in positioned_weights]
+ # coins, merged from previous level coins
+ prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
+ # merge lists
+ coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))
+
+ # got results
+ res = [0] * len(weights)
+
+ for i in range(len(positioned_weights) * 2 - 2):
+ for k,v in coins[i][1].items():
+ if res[k] < v: res[k] = v
+
+ return res
+
def _merge_coins(c1, c2):
"""Merge two coins in one meta-coin. Each coin in pair (weight, {base coin id --> height in tree})"""
w = c1[0] + c2[0]
@@ -45,34 +88,3 @@ def _imerge(iter1, iter2, less_then = None):
yield i2
i2 = None
-
-def make_code(weights, limit):
- """Input:
- - symbols weights in alphabetical order (symbols with weight 0 allowed)
- - code lenght limit
- Output:
- - symbols code lenghts in alphabetical order
- """
- positioned_weights = sorted((w, n) for n,(_,w) in enumerate(weights) if w > 0)
-
- if len(positioned_weights) > 2**limit:
- raise Exception('there are no such code')
-
- coins = []
-
- for level in range(limit, 0, -1):
- # current level coins
- new_coins = [(w, {i:level}) for w,i in positioned_weights]
- # coins, merged from previous level coins
- prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
- # merge lists
- coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))
-
- res = [(s,0) for s,_ in weights]
-
- for i in range(len(positioned_weights) * 2 - 2):
- for k,v in coins[i][1].items():
- if res[k][1] < v: res[k] = (res[k][0], v)
-
- return res
-
View
@@ -4,39 +4,13 @@
_HuffmanRecord = namedtuple("_HuffmanRecord", "length start_code end_code symbols")
-def _tables_from_lenghts(lens):
- """Make canonical huffman code tables from list of code leghts.
- Input: list of pairs (symbol, code length), ordered by alphabet
- Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
- Tuples are sorted by length"""
-
- # make dict {len --> [list of symbols in alphabet order]}
- by_len = defaultdict(list)
- for a, l in lens:
- if l > 0:
- by_len[l].append(a)
-
- # lengths that exists in code
- actual_lens = by_len.keys()
- actual_lens.sort()
-
- tables = []
- code = 0
- prev_len = 0
-
- # fill table
- for l in actual_lens:
- code *= 2**(l-prev_len)
- tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
- code += len(by_len[l])
- prev_len = l
-
- return tables
-
class Decoder(object):
"""Decode symbol from bitsream"""
def __init__(self, lens):
+ """Input:
+ - list or pairs (symbol, code lenght)
+ - OR list or code lenghts; in this case symbols are just integers"""
if isinstance(lens[0], int):
lens = [(n, l) for n,l in enumerate(lens)]
@@ -71,6 +45,9 @@ class Encoder(object):
"""Put symbol into bitstream"""
def __init__(self, lens):
+ """Input:
+ - list or pairs (symbol, code lenght)
+ - OR list or code lenghts; in this case symbols are just integers"""
if isinstance(lens[0], int):
lens = [(n, l) for n,l in enumerate(lens)]
@@ -86,4 +63,32 @@ def put(self, bs, c):
v, n = self.code[c]
bs.put_be(v, n)
+def _tables_from_lenghts(lens):
+ """Make canonical huffman code tables from list of code leghts.
+ Input: list of pairs (symbol, code length), ordered by alphabet
+ Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
+ Tuples are sorted by length"""
+
+ # make dict {len --> [list of symbols in alphabet order]}
+ by_len = defaultdict(list)
+ for a, l in lens:
+ if l > 0:
+ by_len[l].append(a)
+
+ # lengths that exists in code
+ actual_lens = by_len.keys()
+ actual_lens.sort()
+
+ tables = []
+ code = 0
+ prev_len = 0
+
+ # fill table
+ for l in actual_lens:
+ code *= 2**(l-prev_len)
+ tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
+ code += len(by_len[l])
+ prev_len = l
+
+ return tables
View
@@ -1,14 +1,22 @@
"""Implement classical Huffman alghorithm"""
-def make_code(weights):
+def make_code_symbols(weights):
"""Input:
- list of pairs (symbol, weight), simbols with weight 0 are allowed
Output:
- list of pairs (symbol, code lenght) in the same order"""
+ res = make_code([w for _,w in weights])
+ return [(s, l) for (s,_),l in zip(weights, res)]
+
+def make_code(weights):
+ """Input:
+ - list of symbols weights; simbols with weight 0 are allowed
+ Output:
+ - list of symbols code lenghts in the same order"""
# each items is:
# (weight, [(symbol1, len1), (symbol2, len2), ... ])
- codes = [(w, [(i, 0)]) for i,(_,w) in enumerate(weights) if w > 0]
+ codes = [(w, [(i, 0)]) for i,w in enumerate(weights) if w > 0]
codes.sort(key=lambda x: x[0], reverse=True)
while len(codes) > 1:
@@ -26,9 +34,9 @@ def make_code(weights):
codes.insert(i, s)
# now all pairs (symbol, code_len) are contained in codes[0][1]
- res = [(s,0) for s,_ in weights]
+ res = [0] * len(weights)
for i,l in codes[0][1]:
- res[i] = (res[i][0], l)
+ res[i] = l
return res
View
@@ -1,7 +1,7 @@
import sys
from coder import Encoder, Decoder
-from huffman import make_code
-from bounded_huffman import make_code as make_ll_code
+from huffman import make_code_symbols as make_code
+from bounded_huffman import make_code_symbols as make_ll_code # ll for lenght-limited
class DummyInputStream(object):
def __init__(self, data):

0 comments on commit 4576fc5

Please sign in to comment.