# Exercise 2

## Install Necessary Packages

In [15]:
%pip install tabulate matplotlib numpy

Note: you may need to restart the kernel to use updated packages.




## Import Packages

In [16]:
import os
import struct
from tqdm.auto import tqdm
import numpy as np
from matplotlib import pyplot as plt
import tabulate

## Encoding and Decoding Functions

In [17]:
def rice_encode(number, k) -> str:
    # encodes a number n with parameter k in Rice coding, the number is divided into a quotient and remainder
    # the quotient is represented as a unary number, while the remainder is represented as a binary number
    quotient = number >> k
    remainder = number & ((1 << k) - 1)
    return '1' * quotient + '0' + format(remainder, '0' + str(k) + 'b')

In [18]:
def rice_decode(code, k) -> int:
    # decodes a Rice code with parameter k, the process involves locating the first occurrence of 0 in the code
    # the count of 1s before the 0 represents the quotient, while the following k bits after the 0 form the remainder
    quotient = code.find('0')
    remainder = int(code[quotient + 1:quotient + 1 + k], 2)
    return (quotient << k) + remainder

## Test Encoding and Decoding Functions

In [19]:
# confirms that the functions for Rice encoding and decoding are working correctly 
# by asserting that the decoded value matches the original input
assert rice_decode(rice_encode(100, 3), 3) == 100

In [20]:
# confirms correct functionality with any amount of zeros or ones added to the end
assert rice_decode(rice_encode(100, 3).ljust(100, '0'), 3) == 100
assert rice_decode(rice_encode(100, 3).ljust(100, '1'), 3) == 100

## Helper Functions

In [21]:
def bits_to_bytes(bit_string:str) -> bytes:
    # converts a sequence of bits into a bytes object
    padded = bit_string + '0' * ((8 - len(bit_string) % 8) % 8)
    return bytes(int(padded[i:i+8], 2) for i in range(0, len(padded), 8))

In [22]:
def bytes_to_bits(byte_data:bytearray) -> str:
    # converts a bytearray of bytes into a string of bits
    return ''.join(format(byte, '08b') for byte in byte_data)

In [23]:
def is_files_equal(file_1:str, file_2:str) -> bool:
    # checks whether two files are equal by comparing their contents
    with open(file_1, 'rb') as f1, open(file_2, 'rb') as f2:
        return f1.read() == f2.read()

## Test Helper Functions

In [24]:
# any positive integer value for this
number = 199600

# generates a bit string from the given number
bit_string = bin(number)[2:]

# pads it with zeros to a multiple of 8
padding = (8 - len(bit_string) % 8) % 8
bit_string = bit_string.zfill(len(bit_string) + padding)
bits = bytes_to_bits(bits_to_bytes(bit_string))

# checks if the final bits are equal to the original number
assert int(bits, 2) == number

In [25]:
# checks if two sound files are equal or not
assert is_files_equal("Sound1.wav", "Sound2.wav") == False
assert is_files_equal("Sound1.wav", "Sound1.wav")

## Encode File

In [26]:
def encode_file_rice(input_file, output_file, k):
    # encodes a file using Rice encoding with a specified parameter k
    with open(input_file, 'rb') as file:
        input_data = file.read()

    # create a bit string by applying the Rice encoding function to each element in the input data
    bit_string = ''.join([rice_encode(n, k) for n in input_data])

    # adds padding to the bit string by appending '1's until its length is a multiple of 8
    padding = (8 - len(bit_string) % 8) % 8
    bit_string = bit_string.ljust(len(bit_string) + padding, '1')
    encoded_data = bits_to_bytes(bit_string)

    with open(output_file, 'wb') as file:
        file.write(encoded_data)

In [27]:
def decoded_file_rice(input_file, output_file, k):
    # decodes a file using Rice encoding with parameter k
    with open(input_file, 'rb') as file:
        input_data = file.read()
    bit_string = bytes_to_bits(input_data)

    # performs rice decoding
    decoded_data = []
    start = 0
    while start < len(bit_string):
        q = bit_string.find('0', start)

        # checks if the index of '0' is valid for further processing in the context of Rice decoding
        # if it's not valid, the loop breaks
        if q == -1 or q + k >= len(bit_string):
            break

        # decodes a portion of the bit string using the rice_decode() function with a specified parameter k
        # and appends the decoded value to the list of decoded data
        value = rice_decode(bit_string[start:q + k + 1], k)
        decoded_data.append(value)

        # updates the start index for the next portion of the bit string to be decoded
        start = q + k + 1

    with open(output_file, 'wb') as file:
        file.write(bytes(decoded_data))

## Processing

In [28]:
FILES = ["Sound1.wav", "Sound2.wav"]

table_data = []

for file in FILES:
    sample_name = os.path.splitext(os.path.basename(file))[0]
    original_size = os.path.getsize(file)
    output_file_name = sample_name + '_Enc.ex2'

    # Encoding and decoding at k = 4
    encode_file_rice(file, output_file_name, 4)
    encoded_4_size = os.path.getsize(output_file_name)
    decoded_output_file_name = sample_name + '_Enc_Dec.wav'
    decoded_file_rice(output_file_name, decoded_output_file_name, 4)

    # checks if the content of the file and the decoded output file are equal
    assert is_files_equal(file, decoded_output_file_name)

    # Encoding and decoding at k = 2 
    encode_file_rice(file, output_file_name, 2)
    encoded_2_size = os.path.getsize(output_file_name)
    decoded_output_file_name = sample_name + '_Enc_Dec.wav'
    decoded_file_rice(output_file_name, decoded_output_file_name, 2)

    # checks if the content of the file and the decoded output file are equal
    assert is_files_equal(file, decoded_output_file_name)

    # calculates compression percentage for k = 4 and k = 2
    compression_percentage_4 = (1 - encoded_4_size / original_size) * 100
    compression_percentage_2 = (1 - encoded_2_size / original_size) * 100

    # saves results
    table_data.append([file, original_size, encoded_4_size, encoded_2_size, compression_percentage_4, compression_percentage_2])

## Show Results

The results with the proposed in the task description k parameters are the following:

In [29]:
table = tabulate.tabulate(table_data,  tablefmt='html', headers=["", "Original size", "Rice (K = 4 bits)", "Rice (K = 2 bits)", "% Compression (K = 4 bits)", "% Compression (K = 2 bits)"])
table

Unnamed: 0,Original size,Rice (K = 4 bits),Rice (K = 2 bits),% Compression (K = 4 bits),% Compression (K = 2 bits)
Sound1.wav,1002088,1516265,4115718,-51.3106,-310.714
Sound2.wav,1008044,1575347,4348595,-56.2776,-331.389
