In [3]:
from multiset import Multiset
import compressors.rANS as rANS

In [4]:
freqs_list = [
    Frequencies({"A": 1, "B": 1, "C": 2}),
    Frequencies({"A": 12, "B": 34, "C": 1, "D": 45}),
    Frequencies({"A": 34, "B": 35, "C": 546, "D": 1, "E": 13, "F": 245}),
    Frequencies({"A": 5, "B": 5, "C": 5, "D": 5, "E": 5, "F": 5}),
    Frequencies({"A": 1, "B": 3}),
]
params_list = [
    rANSParams(freqs_list[0]),
    rANSParams(freqs_list[1]),
    rANSParams(freqs_list[2], NUM_BITS_OUT=8),
    rANSParams(freqs_list[3], RANGE_FACTOR=1 << 12),
    rANSParams(freqs_list[4], RANGE_FACTOR=1 << 4),
]

In [5]:
# generate random data and test if coding is lossless
DATA_SIZE = 10000
SEED = 0
for freq, rans_params in zip(freqs_list, params_list):
    # generate random data
    prob_dist = freq.get_prob_dist()
    data_block = get_random_data_block(prob_dist, DATA_SIZE, seed=SEED)
    avg_log_prob = get_avg_neg_log_prob(prob_dist, data_block)

    # create encoder decoder
    encoder = rANS.rANSEncoder(rans_params)
    decoder = rANS.rANSDecoder(rans_params)

    # test lossless coding
    is_lossless, encode_len, _ = try_lossless_compression(
        data_block, encoder, decoder, add_extra_bits_to_encoder_output=True
    )
    assert is_lossless
    # avg codelen ignoring the bits used to signal num data elements
    avg_codelen = encode_len / data_block.size
    print(f"rANS coding: avg_log_prob={avg_log_prob:.3f}, rANS codelen: {avg_codelen:.3f}")


rANS coding: avg_log_prob=1.499, rANS codelen: 1.504
rANS coding: avg_log_prob=1.484, rANS codelen: 1.489
rANS coding: avg_log_prob=1.430, rANS codelen: 1.435
rANS coding: avg_log_prob=2.585, rANS codelen: 2.590
rANS coding: avg_log_prob=0.815, rANS codelen: 0.819
