# Testing Run Length Encoding


## Imports


In [40]:
from collections import deque
from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader
from scipy.io import wavfile
from glob import glob
import numpy as np
import pandas as pd

# Import Encode
spec = spec_from_loader("encode", SourceFileLoader("encode", "../../encode"))
encode = module_from_spec(spec)
spec.loader.exec_module(encode)

# Import Decode
spec = spec_from_loader("decode", SourceFileLoader("decode", "../../decode"))
decode = module_from_spec(spec)
spec.loader.exec_module(decode)

## Function Definitions


In [None]:
def rle_algorithm(rle_l_raw: list):
    """This algorithm will search for contiguous values within the
       array. When the rle_location_count is greater than the value
       65530, the count is reduced by this value in order to
       prevent an overflow of an unsigned 16-bit integer. This allows
       for the data to be stored with 2 bytes when the format of the
       array is a known value in advance of decoding this format. The
       choice of integer 65530 is an arbitrary value less than that of
       the maximum value of an unsigned 16-bit integer (65536).

    Args:
        rle_l_raw (list): This is a list of integer values to be
                          encoded.

    Returns:
        index_array (list): This is the list of run length encoded
                            values.
        rle_locations (list): This is a list of locations of elements
                              that are repeated that are present in the
                              array of indices.
    """
    initial_index = 0
    second_index = 1
    count = 0
    index_array = []
    rle_locations = []
    rle_location_count = 0
    number_of_arrays = 0  # This variable is used for convenience when reconstructing

    while second_index < len(rle_l_raw):
        if rle_l_raw[initial_index] == rle_l_raw[second_index]:
            index_array.append(rle_l_raw[initial_index])
            rle_locations.append(rle_location_count)

            # continue searching the breadth of the array; increasing
            # count
            while (
                second_index <= len(rle_l_raw)
                and rle_l_raw[initial_index] == rle_l_raw[second_index]
            ):
                count += 1
                second_index += 1
            index_array.append(count)
            if rle_location_count > 65530:
                rle_location_count -= 65530
                rle_location_count += 2
                number_of_arrays += 1
            else:
                rle_location_count += 2
        else:
            index_array.append(rle_l_raw[initial_index])
            if rle_location_count <= 65530:
                rle_location_count += 1
            else:
                rle_location_count -= 65530
                rle_location_count += 1
        count = 0
        initial_index = second_index
        second_index += 1
    if rle_l_raw[-1] != index_array[-2]:
        index_array.append(rle_l_raw[-1])
    return index_array, rle_locations

## Import Data & Huffman Encode Values


In [41]:
data_dir = "../../data/"
data_file_l = glob(data_dir + "*.wav")
current_file = data_file_l[0]

rate, data = wavfile.read(current_file)

data_bytes = data.tobytes()

compressed_file_path = "../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav.brainwire"

encode.huffman_encoding(
    input_data=data_bytes,
    compressed_file_path="../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav.brainwire",
)

with open(compressed_file_path, "rb") as fp:
    data_huffman_encoded_bytes = fp.read()
    fp.close()

type(data_huffman_encoded_bytes)

In [91]:
for byte in data_huffman_encoded_bytes[:12]:
    print(byte)

128
4
149
135
5
0
0
0
0
0
0
125


In [50]:
data_huffman_encoded_bytes_sub_string = data_huffman_encoded_bytes

rle_l_raw = []
for data in data_huffman_encoded_bytes_sub_string:
    rle_l_raw.append(data)
rle_l_raw = deque(rle_l_raw)
rle_l_raw

In [54]:
index_array, rle_locations = rle_algorithm(rle_l_raw)

In [101]:
index_array[:30]

[128,
 4,
 149,
 135,
 5,
 0,
 5,
 125,
 148,
 40,
 140,
 2,
 50,
 48,
 148,
 140,
 4,
 48,
 3,
 148,
 140,
 2,
 48,
 56,
 148,
 140,
 5,
 48,
 2,
 49]

In [56]:
type(rle_locations)

list

In [57]:
rle_locations = np.array(rle_locations, dtype=np.uint16)
rle_locations_pd = pd.DataFrame(rle_locations)
rle_locations_pd.head()

## Recreating the array of indices


In [21]:
# reconstructed_array

In [60]:
rle_locations.max()

65404

In [61]:
index_array[rle_locations[0] + 1]

5

In [62]:
rle_locations

array([    5,    17,    27,    39,    41,    43,    53,    55,    57,
          68,    70,    74,    84,    86,   100,   102,   105,   116,
         118,   121,   126,   131,   133,   143,   155,   157,   168,
         170,   182,   208,   219,   221,   232,   236,   247,   262,
         265,   276,   288,   290,   301,   314,   325,   336,   348,
         362,   375,   389,   404,   406,   420,   422,   435,   449,
         455,   463,   466,   479,   482,   485,   498,   501,   509,
         517,   520,   522,   535,   551,   553,   566,   585,   592,
         605,   607,   619,   633,   643,   645,   655,   657,   669,
         671,   673,   683,   696,   699,   709,   711,   721,   733,
         735,   745,   747,   758,   760,   763,   773,   775,   780,
         790,   792,   797,   808,   810,   817,   827,   829,   836,
         848,   850,   857,   859,   869,   871,   888,   890,   896,
         906,   908,   912,   922,   924,   926,   937,   939,   941,
         943,   953,

In [63]:
print(f"The value that is run length encoded: {index_array[rle_locations[0]]}")
print(f"The number of values detected for run length encoding: ", end="")
print(f"{index_array[rle_locations[0] + 1]}")

The value that is run length encoded: 0
The number of values detected for run length encoding: 5


In [64]:
rle_locations = np.array(rle_locations)

In [65]:
index_array_1 = index_array[:65536]
index_array_2 = index_array[65536:]

In [66]:
rle_locations_pd = pd.DataFrame(rle_locations)
rle_locations_pd.columns = ["Value"]

rle_locations_2 = (
    rle_locations_pd.where(rle_locations_pd["Value"] >= 65536).dropna().values
)

rle_locations_2 = rle_locations_2.reshape(rle_locations_2.shape[0])
rle_locations_2 = rle_locations_2[:] - 65536
rle_locations_2 = np.array(rle_locations_2, dtype=np.int16)

In [87]:
values_pd = rle_locations_pd.where(rle_locations_pd["Value"] < 65536).dropna()
sample_value = values_pd["Value"].iloc[-1]

In [74]:
np.array(sample_value, dtype=np.uint16)

array(64714, dtype=uint16)

In [88]:
sample_value

64714

In [84]:
np.array(rle_locations_pd["Value"][:65536], dtype=np.uint16)

array([    5,    17,    27,    39,    41,    43,    53,    55,    57,
          68,    70,    74,    84,    86,   100,   102,   105,   116,
         118,   121,   126,   131,   133,   143,   155,   157,   168,
         170,   182,   208,   219,   221,   232,   236,   247,   262,
         265,   276,   288,   290,   301,   314,   325,   336,   348,
         362,   375,   389,   404,   406,   420,   422,   435,   449,
         455,   463,   466,   479,   482,   485,   498,   501,   509,
         517,   520,   522,   535,   551,   553,   566,   585,   592,
         605,   607,   619,   633,   643,   645,   655,   657,   669,
         671,   673,   683,   696,   699,   709,   711,   721,   733,
         735,   745,   747,   758,   760,   763,   773,   775,   780,
         790,   792,   797,   808,   810,   817,   827,   829,   836,
         848,   850,   857,   859,   869,   871,   888,   890,   896,
         906,   908,   912,   922,   924,   926,   937,   939,   941,
         943,   953,

In [77]:
rle_locations_1 = (
    rle_locations_pd.where(rle_locations_pd["Value"] < 65536).dropna().values
)
rle_locations_1 = rle_locations_1.reshape(rle_locations_1.shape[0])
rle_locations_1 = np.array(rle_locations_1, dtype=np.uint16)

In [86]:
rle_locations_1

array([    5,    17,    27,    39,    41,    43,    53,    55,    57,
          68,    70,    74,    84,    86,   100,   102,   105,   116,
         118,   121,   126,   131,   133,   143,   155,   157,   168,
         170,   182,   208,   219,   221,   232,   236,   247,   262,
         265,   276,   288,   290,   301,   314,   325,   336,   348,
         362,   375,   389,   404,   406,   420,   422,   435,   449,
         455,   463,   466,   479,   482,   485,   498,   501,   509,
         517,   520,   522,   535,   551,   553,   566,   585,   592,
         605,   607,   619,   633,   643,   645,   655,   657,   669,
         671,   673,   683,   696,   699,   709,   711,   721,   733,
         735,   745,   747,   758,   760,   763,   773,   775,   780,
         790,   792,   797,   808,   810,   817,   827,   829,   836,
         848,   850,   857,   859,   869,   871,   888,   890,   896,
         906,   908,   912,   922,   924,   926,   937,   939,   941,
         943,   953,

In [18]:
# Create a list of values such that the indices indicate the positions
# of rle elements that are present in the array of values.

In [90]:
rle_positions = []
for index in range(1, len(index_array), 2):
    if index_array[index] > 1:
        rle_positions.append(index)
rle_positions

[1,
 3,
 7,
 9,
 11,
 13,
 15,
 17,
 19,
 21,
 23,
 25,
 27,
 29,
 31,
 33,
 35,
 37,
 39,
 41,
 43,
 45,
 47,
 49,
 51,
 53,
 55,
 57,
 59,
 61,
 63,
 65,
 67,
 69,
 73,
 77,
 79,
 81,
 83,
 85,
 89,
 91,
 93,
 95,
 97,
 99,
 101,
 105,
 107,
 109,
 111,
 113,
 115,
 117,
 121,
 123,
 125,
 129,
 131,
 133,
 135,
 137,
 139,
 141,
 143,
 145,
 147,
 149,
 151,
 153,
 155,
 157,
 159,
 161,
 163,
 165,
 167,
 171,
 173,
 175,
 177,
 179,
 181,
 185,
 187,
 189,
 191,
 193,
 195,
 197,
 199,
 201,
 203,
 205,
 207,
 211,
 213,
 215,
 217,
 219,
 221,
 223,
 225,
 227,
 229,
 231,
 235,
 239,
 241,
 243,
 245,
 247,
 249,
 251,
 253,
 255,
 257,
 259,
 261,
 265,
 267,
 269,
 271,
 273,
 275,
 277,
 279,
 281,
 283,
 285,
 287,
 289,
 293,
 295,
 297,
 299,
 301,
 303,
 305,
 307,
 309,
 311,
 313,
 315,
 317,
 319,
 321,
 323,
 325,
 327,
 329,
 331,
 333,
 335,
 337,
 339,
 341,
 343,
 345,
 347,
 351,
 353,
 355,
 357,
 359,
 361,
 363,
 365,
 367,
 369,
 371,
 373,
 375,
 377,
 379,


In [83]:
for item, value in enumerate(rle_positions):
    print(index_array[value])

4
135
125
40
2
48
140
48
148
2
56
140
48
49
148
2
101
140
48
49
48
148
2
101
140
48
49
48
49
140
48
148
9
2
49
140
54
148
9
2
49
49
140
101
148
9
2
49
48
140
49
148
9
2
49
148
2
140
48
49
148
2
48
140
48
49
148
2
49
140
48
49
48
140
97
148
5
2
140
100
148
4
49
140
101
148
5
49
49
148
2
100
140
48
48
140
53
148
5
49
48
148
2
99
140
48
49
140
54
148
7
49
48
48
148
2
98
140
48
49
148
2
102
140
48
2
148
2
101
140
48
3
140
102
148
7
49
48
148
2
97
140
48
4
140
102
148
5
48
148
2
54
140
49
2
148
2
48
140
49
148
2
50
140
49
49
7
140
53
148
11
48
48
49
140
49
148
11
48
48
49
148
2
51
140
49
49
5
148
2
50
140
49
49
5
2
140
97
148
9
48
48
49
140
57
148
8
48
48
49
140
49
148
10
48
48
49
2
140
97
148
12
48
48
49
48
148
2
52
140
49
49
2
48
49
49
140
100
148
11
48
48
49
140
102
148
9
48
48
49
49
140
102
148
8
48
48
49
148
2
99
140
49
49
148
2
51
140
49
49
49
140
48
148
5
48
148
2
98
140
49
49
48
148
2
50
140
49
49
48
148
2
57
140
49
49
148
2
102
140
49
48
148
2
49
140
49
48
49
148
2
55
140
49
48
49
