In [1]:
from bitarray import bitarray
import numpy as np
import pandas as pd
import math

# Utilities

In [2]:
def read_data(filename):
    prefix = './data/lesson02/'
    extension = '.txt'
    with open(prefix + filename + extension) as file:
        lines = file.readlines()
    
    numbers = [int(num.replace('\n', '')) for num in lines]
    
    return numbers

In [3]:
class BitFile(object):
    def __init__(self, header_length=4):
        '''
        Arguments:
        header_length -- length of header in bits
        '''
        self.header_length_bits = header_length * 8

    def save(self, path, bit_array):
        content_bits = bit_array
        content_length = len(content_bits)
        
        header_bits = bitarray(bin(content_length)[2:].zfill(self.header_length_bits))
        
        bits = header_bits + content_bits
        with open(path, 'bw') as file:
            bits.tofile(file)
            
    def load(self, path):
        with open(path, 'br') as file:
            bits = bitarray()
            bits.fromfile(file)
            
            header_bits = bits[:self.header_length_bits]
            content_length = int(header_bits.to01(), 2)
            content_bits = bits[self.header_length_bits:self.header_length_bits+content_length]
            
            return content_bits

In [4]:
def test_array_equality(expected, actual):
    if len(expected) != len(actual):
        raise Exception('Lengths does not match')
        
    for i in range(len(expected)):
        if expected[i] != actual[i]:
            raise Exception(f"Index {i} (expected value '{expected[i]}' got '{actual[i]}')")

# Read data

In [5]:
uniform_8 = read_data('uniform_8')
gausian_8 = read_data('gausian_8')
exponential_8 = read_data('exponential_8')

uniform_16 = read_data('uniform_16')
gausian_16 = read_data('gausian_16')
exponential_16 = read_data('exponential_16')

# Fibonacci coding

In [6]:
class FibonacciEncoder(object):
    def encode(self, numbers):
        self.fib = self.get_fibonacci_sequence(max(numbers))

        bit_array = bitarray()
        for number in numbers:
            bit_array += self.indices_to_binary(self.find_fibonacci_sum_indexes(number))

        return bit_array
    
    def get_fibonacci_sequence(self, max_number):
        ''' Returns fibonacci sequence starting with single 1 (1, 2, 3, 5, ...)'''
        i = 2
        sequence = [1, 1]
        while sequence[-1] < max_number:
            sequence.append(sequence[i-1] + sequence[i-2])
            i += 1

        return sequence[1:]
    
    def index_of_largest_lesser_or_equal(self, number):
        return [(i, n) for i, n in enumerate(self.fib) if n <= number][-1]
    
    def find_fibonacci_sum_indexes(self, number):
        indexes = []
        while number > 0:
            i, n = self.index_of_largest_lesser_or_equal(number)
            number = number - n
            indexes.append(i)

        return indexes[::-1]
    
    def indices_to_binary(self, fibonacci_sum_indices):
        length = max(fibonacci_sum_indices) + 2
        bit_array = bitarray(length)
        bit_array.setall(0)

        for i in fibonacci_sum_indices:
            bit_array[i] = 1
        bit_array[-1] = 1

        return bit_array

In [7]:
class FibonacciDecoder(object):
    def __init__(self, max_number=100000):
        self.fib = self.get_fibonacci_sequence(max_number)

    def decode(self, bit_array):
        numbers = []

        start = 0
        i = 1
        while i < len(bit_array):
            if bit_array[i-1] and bit_array[i]:
                numbers.append(self.sum_fibonacci_indices(bit_array[start:i]))
                i += 1
                start = i

            i += 1

        return numbers

    def sum_fibonacci_indices(self, bit_array):
        sum_ = 0
        for i, bit in enumerate(bit_array):
            if bit:
                sum_ += self.fib[i]
        return sum_

    def get_fibonacci_sequence(self, max_number):
        ''' Returns fibonacci sequence starting with single 1 (1, 2, 3, 5, ...)'''
        i = 2
        sequence = [1, 1]
        while sequence[-1] < max_number:
            sequence.append(sequence[i-1] + sequence[i-2])
            i += 1

        return sequence[1:]

In [8]:
data = uniform_8
encoder = FibonacciEncoder()
encoded = encoder.encode(data)

bitfile = BitFile()
bitfile.save('output/fib.bin', encoded)

loaded = bitfile.load('output/fib.bin')

decoder = FibonacciDecoder()
decoded = decoder.decode(loaded)

test_array_equality(uniform_8, decoded)

# Raw binary coding

In [9]:
class RawBinaryEncoderDecoder(object):
    def __init__(self, int_size=32):
        self.int_size = int_size
    
    def encode(self, numbers):
        bit_array = bitarray()
        
        for number in numbers:
            binary_number = bin(number)[2:].zfill(self.int_size)
            bit_array += bitarray(binary_number)
            
        return bit_array
    
    def decode(self, bit_array):
        if len(bit_array) % self.int_size != 0:
            raise Exception('Wrong input')
            
        numbers = []
        
        for i in range(len(bit_array)//self.int_size):
            binary_number = bit_array[i*self.int_size:(i+1)*self.int_size].to01()
            numbers.append(int(binary_number, 2))
            
        return numbers

In [10]:
data = uniform_8
encoder = RawBinaryEncoderDecoder(int_size=9) # one extra bit because data contains number 256
encoded = encoder.encode(data)

bitfile = BitFile()
bitfile.save('output/raw.bin', encoded)

loaded = bitfile.load('output/raw.bin')

decoder = RawBinaryEncoderDecoder(int_size=9)
decoded = decoder.decode(loaded)

test_array_equality(uniform_8, decoded)

# Elias code (gamma)

In [11]:
class EliasEncoder(object):
    def encode(self, numbers):
        bit_array = bitarray()
        
        for number in numbers:
            binary_number = self.get_leading_zeros(number) + self.get_binary(number)
            bit_array += binary_number
            
        return bit_array
    
    def get_leading_zeros(self, number):
        number_of_zeros = int(math.log2(number))
        bit_array = bitarray(number_of_zeros)
        bit_array.setall(0)

        return bit_array

    def get_binary(self, number):
        return bitarray(bin(number)[2:])

In [12]:
class EliasDecoder(object):
    def decode(self, bit_array):
        numbers = []
        i = 0
        while i < len(bit_array):
            M = 0 # number of zeros
            while not bit_array[i]:
                M += 1
                i += 1
                
            L = bit_array[i:i+M+1]
            number = int(L.to01(), 2)
            numbers.append(number)
            
            i += M + 1
            
        return numbers

In [13]:
data = uniform_8
encoder = EliasEncoder()
encoded = encoder.encode(data)

bitfile = BitFile()
bitfile.save('output/elias.bin', encoded)

loaded = bitfile.load('output/elias.bin')

decoder = EliasDecoder()
decoded = decoder.decode(loaded)

test_array_equality(uniform_8, decoded)

# Results

In [14]:
encoders_decoders = [
    ('Elias (gamma)', EliasEncoder(), EliasDecoder()),
    ('Fibonacci', FibonacciEncoder(), FibonacciDecoder()),
    ('Raw binary', RawBinaryEncoderDecoder(17), RawBinaryEncoderDecoder(17)),
]

datasets = [
    ('uniform_8', uniform_8),
    ('gausian_8', gausian_8),
    ('exponential_8', exponential_8),
    ('uniform_16', uniform_16),
    ('gausian_16', gausian_16),
    ('exponential_16', exponential_16),
]

In [15]:
bits_per_file = np.zeros((len(encoders_decoders), len(datasets)), dtype=int)
bits_per_symbol = np.zeros((len(encoders_decoders), len(datasets)))

for i, (encoder_name, encoder, decoder) in enumerate(encoders_decoders):
    for j, (dataset_name, dataset) in enumerate(datasets):
        encoded = encoder.encode(dataset)
        
        bitfile = BitFile()
        bitfile.save('output/data.bin', encoded)
        loaded = bitfile.load('output/data.bin')
        
        decoded = decoder.decode(loaded)
        
        test_array_equality(dataset, decoded)
        
        bits_per_file[i, j] = len(loaded)
        bits_per_symbol[i, j] = len(loaded) / len(dataset)

### Bits per file

In [16]:
df = pd.DataFrame(bits_per_file, columns=[n for n, _ in datasets], index=[n for n, _, _ in encoders_decoders])
df.style.format("{:,}")

Unnamed: 0,uniform_8,gausian_8,exponential_8,uniform_16,gausian_16,exponential_16
Elias (gamma),856640,917064,454900,3800032,3932248,1138872
Fibonacci,699069,730716,404520,2902780,3011773,973968
Raw binary,1114112,1114112,1114112,2228224,2228224,2228224


### Bits per symbol

In [17]:
df = pd.DataFrame(bits_per_symbol, columns=[n for n, _ in datasets], index=[n for n, _, _ in encoders_decoders])
df.style.format("{:.2f}")

Unnamed: 0,uniform_8,gausian_8,exponential_8,uniform_16,gausian_16,exponential_16
Elias (gamma),13.07,13.99,6.94,28.99,30.0,8.69
Fibonacci,10.67,11.15,6.17,22.15,22.98,7.43
Raw binary,17.0,17.0,17.0,17.0,17.0,17.0
