# Testing Run Length Encoding


## Imports


In [1]:
from collections import deque
from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader
from scipy.io import wavfile
from glob import glob
import numpy as np

# Import Encode
spec = spec_from_loader("encode", SourceFileLoader("encode", "../../encode"))
encode = module_from_spec(spec)
spec.loader.exec_module(encode)

# Import Decode
spec = spec_from_loader("decode", SourceFileLoader("decode", "../../decode"))
decode = module_from_spec(spec)
spec.loader.exec_module(decode)

In [2]:
data_dir = "../../data/"
data_file_l = glob(data_dir + "*.wav")
current_file = data_file_l[0]

In [3]:
rate, data = wavfile.read(current_file)

In [4]:
data_bytes = data.tobytes()

In [5]:
compressed_file_path = "../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav.brainwire"

In [6]:
encode.huffman_encoding(
    input_data=data_bytes,
    compressed_file_path="../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav.brainwire",
)

In [7]:
with open(compressed_file_path, "rb") as fp:
    data_huffman_encoded_bytes = fp.read()
    fp.close()

In [8]:
type(data_huffman_encoded_bytes)

bytes

In [9]:
for byte in data_huffman_encoded_bytes[:12]:
    print(byte)

128
4
149
135
5
0
0
0
0
0
0
125


In [10]:
data_huffman_encoded_bytes_sub_string = data_huffman_encoded_bytes

In [11]:
rle_l_raw = []
for data in data_huffman_encoded_bytes_sub_string:
    rle_l_raw.append(data)
rle_l_raw = deque(rle_l_raw)

In [12]:
rle_l_encoded = []

In [13]:
rle_l_raw

deque([128,
       4,
       149,
       135,
       5,
       0,
       0,
       0,
       0,
       0,
       0,
       125,
       148,
       40,
       140,
       2,
       50,
       48,
       148,
       140,
       4,
       48,
       48,
       48,
       48,
       148,
       140,
       2,
       48,
       56,
       148,
       140,
       5,
       48,
       48,
       48,
       49,
       48,
       148,
       140,
       2,
       48,
       101,
       148,
       140,
       8,
       48,
       48,
       48,
       49,
       49,
       48,
       48,
       48,
       148,
       140,
       2,
       53,
       101,
       148,
       140,
       8,
       48,
       48,
       48,
       49,
       49,
       48,
       48,
       49,
       148,
       140,
       2,
       48,
       102,
       148,
       140,
       9,
       48,
       48,
       48,
       49,
       49,
       48,
       49,
       48,
       48,
       148,
       140,
       2,


In [28]:
def rle_algorithm(rle_l_raw: list):
    """This algorithm will search for contiguous values within the
       array. When the rle_location_count is greater than the value
       65530, then the count is reduced by this value in order to
       prevent an overflow of an unsigned 16-bit integer. This allows
       for the data to be stored with 2 bytes when the format of the
       array is a known value in advance of decoding this format. The
       choice of integer 65530 is an arbitrary value.

    Args:
        rle_l_raw (list): This is a list of integer values to be
                          encoded.

    Returns:
        index_array (list): This is the list of run length encoded
                            values.
        rle_locations (list): This is a list of locations of elements
                              that are repeated that are present in the
                              array of indices.
    """
    initial_index = 0
    second_index = 1
    count = 0
    index_array = []
    rle_locations = []
    rle_location_count = 0

    while second_index < len(rle_l_raw):
        if rle_l_raw[initial_index] == rle_l_raw[second_index]:
            index_array.append(rle_l_raw[initial_index])
            rle_locations.append(rle_location_count)

            # continue searching the breadth of the array; increasing
            # count
            while (
                second_index <= len(rle_l_raw)
                and rle_l_raw[initial_index] == rle_l_raw[second_index]
            ):
                count += 1
                second_index += 1
            index_array.append(count)
            if rle_location_count > 65530:
                rle_location_count -= 65530
                rle_location_count += 2
            else:
                rle_location_count += 2
        else:
            index_array.append(rle_l_raw[initial_index])
            if rle_location_count <= 65530:
                rle_location_count += 1
        count = 0
        initial_index = second_index
        second_index += 1
    if rle_l_raw[-1] != index_array[-2]:
        index_array.append(rle_l_raw[-1])
    return index_array, rle_locations

In [23]:
index_array, rle_locations = rle_algorithm(rle_l_raw)

break


In [None]:
index_array

In [None]:
print(f"The value that is run length encoded: {index_array[rle_locations[0]]}")
print(f"The number of values detected for run length encoding: ", end="")
print(f"{index_array[rle_locations[0] + 1]}")

In [18]:
rle_locations = np.array(rle_locations)

In [19]:
index_array_1 = index_array[:65536]
index_array_2 = index_array[65536:]

In [20]:
import pandas as pd

In [21]:
rle_locations_pd = pd.DataFrame(rle_locations)

In [22]:
rle_locations_pd.columns = ["Value"]

In [23]:
rle_locations_2 = (
    rle_locations_pd.where(rle_locations_pd["Value"] >= 65536).dropna().values
)

In [24]:
rle_locations_2 = rle_locations_2.reshape(rle_locations_2.shape[0])

In [25]:
rle_locations_2 = rle_locations_2[:] - 65536
rle_locations_2 = np.array(rle_locations_2, dtype=np.int16)

In [None]:
rle_locations_2

In [84]:
values_pd = rle_locations_pd.where(rle_locations_pd["Value"] < 65536).dropna()

In [85]:
sample_value = values_pd["Value"].iloc[-1]

In [None]:
np.array(sample_value, dtype=np.uint16)

In [None]:
sample_value

In [None]:
np.array(rle_locations_pd["Value"][:65536], dtype=np.int16)

In [72]:
rle_locations_1 = (
    rle_locations_pd.where(rle_locations_pd["Value"] < 65536).dropna().values
)

In [75]:
rle_locations_1 = rle_locations_1.reshape(rle_locations_1.shape[0])

In [78]:
rle_locations_1 = np.array(rle_locations_1, dtype=np.int16)

In [None]:
rle_locations_1

In [18]:
# Create a list of values such that the indices indicate the positions
# of rle elements that are present in the array of values.

In [None]:
rle_positions = []
for index in range(1, len(index_array), 2):
    if index_array[index] > 1:
        rle_positions.append(index)

In [None]:
rle_positions

In [None]:
for item, value in enumerate(rle_positions):
    print(index_array[value])

In [None]:
# Update array to write only individual values and a marker to signify
# a value that is more than one.