more comments and more user-friendly interface

fat-crocodile · May 22, 2014 · 4576fc5 · 4576fc5
1 parent cd362bc
commit 4576fc5
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 66 deletions.
diff --git a/huffman/bounded_huffman.py b/huffman/bounded_huffman.py
@@ -4,6 +4,49 @@
 "A Fast Algorithm for Optimal Length-Limited Huffman Codes"
 Journal of the Association for Computing Machinery, Vol. 37, No. 3, July 1990"""
 
+def make_code_symbols(weights, limit):
+    """Input: 
+        - list of pairs (symbol, weight); symbols with weight 0 allowed
+        - code lenght limit
+        Output:
+        - list of pairs (symbol, code lenght) in the same order
+    """
+    res = make_code([w for _,w in weights], limit)
+    return [(s, l) for (s,_),l in zip(weights, res)]
+
+def make_code(weights, limit):
+    """Input: 
+        - symbols weights in alphabetical order (symbols with weight 0 allowed)
+        - code lenght limit
+        Output:
+        - symbols code lenghts in alphabetical order
+    """
+
+    # sort by weight, exclude zero-weighted symbols and save original symbol position
+    positioned_weights = sorted((w, n) for n,w in enumerate(weights) if w > 0)
+
+    if len(positioned_weights) > 2**limit:
+        raise Exception('there are no such code')
+
+    coins = []
+
+    for level in range(limit, 0, -1):
+        # generate current level coins
+        new_coins = [(w, {i:level}) for w,i in positioned_weights]
+        # coins, merged from previous level coins
+        prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
+        # merge lists
+        coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))
+
+    # got results
+    res = [0] * len(weights)
+
+    for i in range(len(positioned_weights) * 2 - 2):
+        for k,v in coins[i][1].items():
+            if res[k] < v: res[k] = v
+
+    return res
+
 def _merge_coins(c1, c2):
     """Merge two coins in one meta-coin. Each coin in pair (weight, {base coin id --> height in tree})"""
     w = c1[0] + c2[0]
@@ -45,34 +88,3 @@ def _imerge(iter1, iter2, less_then = None):
             yield i2
             i2 = None
 
-
-def make_code(weights, limit):
-    """Input: 
-        - symbols weights in alphabetical order (symbols with weight 0 allowed)
-        - code lenght limit
-        Output:
-        - symbols code lenghts in alphabetical order
-    """
-    positioned_weights = sorted((w, n) for n,(_,w) in enumerate(weights) if w > 0)
-
-    if len(positioned_weights) > 2**limit:
-        raise Exception('there are no such code')
-
-    coins = []
-
-    for level in range(limit, 0, -1):
-        # current level coins
-        new_coins = [(w, {i:level}) for w,i in positioned_weights]
-        # coins, merged from previous level coins
-        prev_coins = [_merge_coins(coins[2*i], coins[2*i+1]) for i in range(len(coins) / 2)]
-        # merge lists
-        coins = list(_imerge(prev_coins, new_coins, lambda x,y: x[0] < y[0]))
-
-    res = [(s,0) for s,_ in weights]
-
-    for i in range(len(positioned_weights) * 2 - 2):
-        for k,v in coins[i][1].items():
-            if res[k][1] < v: res[k] = (res[k][0], v)
-
-    return res
-
diff --git a/huffman/coder.py b/huffman/coder.py
@@ -4,39 +4,13 @@
 
 _HuffmanRecord = namedtuple("_HuffmanRecord", "length start_code end_code symbols")
 
-def _tables_from_lenghts(lens):
-    """Make canonical huffman code tables from list of code leghts.
-    Input:  list of pairs (symbol, code length), ordered by alphabet
-    Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
-    Tuples are sorted by length"""
-
-    # make dict {len --> [list of symbols in alphabet order]}
-    by_len = defaultdict(list)
-    for a, l in lens:
-        if l > 0:
-            by_len[l].append(a)
-
-    # lengths that exists in code
-    actual_lens = by_len.keys()
-    actual_lens.sort()
-
-    tables = []
-    code = 0
-    prev_len = 0
-
-    # fill table
-    for l in actual_lens:
-        code *= 2**(l-prev_len)
-        tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
-        code += len(by_len[l])
-        prev_len = l
-
-    return tables
-
 class Decoder(object):
     """Decode symbol from bitsream"""
 
     def __init__(self, lens):
+        """Input: 
+        - list or pairs (symbol, code lenght)
+        - OR list or code lenghts; in this case symbols are just integers"""
         if isinstance(lens[0], int):
             lens = [(n, l) for n,l in enumerate(lens)]
 
@@ -71,6 +45,9 @@ class Encoder(object):
     """Put symbol into bitstream""" 
 
     def __init__(self, lens):
+        """Input: 
+        - list or pairs (symbol, code lenght)
+        - OR list or code lenghts; in this case symbols are just integers"""
         if isinstance(lens[0], int):
             lens = [(n, l) for n,l in enumerate(lens)]
 
@@ -86,4 +63,32 @@ def put(self, bs, c):
         v, n = self.code[c]
         bs.put_be(v, n)
 
+def _tables_from_lenghts(lens):
+    """Make canonical huffman code tables from list of code leghts.
+    Input:  list of pairs (symbol, code length), ordered by alphabet
+    Output: list of tuples (length, start_code, end_code, [symbols in alphabet order])
+    Tuples are sorted by length"""
+
+    # make dict {len --> [list of symbols in alphabet order]}
+    by_len = defaultdict(list)
+    for a, l in lens:
+        if l > 0:
+            by_len[l].append(a)
+
+    # lengths that exists in code
+    actual_lens = by_len.keys()
+    actual_lens.sort()
+
+    tables = []
+    code = 0
+    prev_len = 0
+
+    # fill table
+    for l in actual_lens:
+        code *= 2**(l-prev_len)
+        tables.append(_HuffmanRecord(l, code, code + len(by_len[l]), by_len[l]))
+        code += len(by_len[l])
+        prev_len = l
+
+    return tables
 
diff --git a/huffman/huffman.py b/huffman/huffman.py
@@ -1,14 +1,22 @@
 """Implement classical Huffman alghorithm"""
 
-def make_code(weights):
+def make_code_symbols(weights):
     """Input:
         - list of pairs (symbol, weight), simbols with weight 0 are allowed
        Output:
         - list of pairs (symbol, code lenght) in the same order"""
+    res = make_code([w for _,w in weights])
+    return [(s, l) for (s,_),l in zip(weights, res)]
+
+def make_code(weights):
+    """Input:
+        - list of symbols weights; simbols with weight 0 are allowed
+       Output:
+        - list of symbols code lenghts in the same order"""
 
     # each items is:
     # (weight, [(symbol1, len1), (symbol2, len2), ...  ])
-    codes = [(w, [(i, 0)]) for i,(_,w) in enumerate(weights) if w > 0]
+    codes = [(w, [(i, 0)]) for i,w in enumerate(weights) if w > 0]
     codes.sort(key=lambda x: x[0], reverse=True)
 
     while len(codes) > 1:
@@ -26,9 +34,9 @@ def make_code(weights):
         codes.insert(i, s)
 
     # now all pairs (symbol, code_len) are contained in codes[0][1]
-    res = [(s,0) for s,_ in weights]
+    res = [0] * len(weights)
 
     for i,l in codes[0][1]:
-        res[i] = (res[i][0], l)
+        res[i] = l
 
     return res
diff --git a/huffman/test_huffman.py b/huffman/test_huffman.py
@@ -1,7 +1,7 @@
 import sys
 from coder import Encoder, Decoder
-from huffman import make_code 
-from bounded_huffman import make_code as make_ll_code
+from huffman import make_code_symbols as make_code 
+from bounded_huffman import make_code_symbols as make_ll_code  # ll for lenght-limited
 
 class DummyInputStream(object):
     def __init__(self, data):