# Comparing and Contrasting Different Compression Methods


## Library Imports


In [1]:
# Imports
from glob import glob
from scipy.io import wavfile
from signal_processing_utilities import process_signal
import time
import os
import numpy as np

import zlib
import gzip
import bz2
import lzma

from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader

import pandas as pd
import matplotlib.pyplot as plt

# Import encode
spec = spec_from_loader("encode", SourceFileLoader("encode", "../.././encode"))
encode = module_from_spec(spec)
spec.loader.exec_module(encode)

# Import decode
spec = spec_from_loader("decode", SourceFileLoader("decode", "../.././decode"))
decode = module_from_spec(spec)
spec.loader.exec_module(decode)

## Function Definitions


In [14]:
def compare_compression_ratio(file, compressed_file: str, method: str = None):
    """This function prints the compression ratio of two files.

    Args:
        file (numpy.ndarray): This is the array of amplitudes before
                              compression.
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        method (str):  This is the string representing the method of
                       compression. Defaults to None.
    """
    percent_compression = (1 - (len(compressed_file) / len(file.tobytes()))) * 100
    if method != None:
        print(f"\nMethod of Compression: {method}")
    else:
        print("\n")
    print(f"Initial file size: {len(file.tobytes())} bytes.")
    print(f"Compressed File Size: {len(compressed_file)} bytes.")
    print(f"Percent of Compression: {percent_compression:.2f}%")
    print(f"\n")

In [15]:
def print_compression_efficiency_metrics_wrapper(
    file: str, compressed_file: str, start_time: int, stop_time: int, method: str
):
    """This is a wrapper function to print the start and stop times as
       well as the ratio of compression.

    Args:
        file (str): This is the array of amplitudes before compression
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        start_time (int): This is the initial starting time in
                          nanoseconds.
        stop_time (int): This is the final time in nanoseconds of the
                         chosen method of compression.
        method (str): This is the string representing the
                                method of compression.
    """
    compare_compression_ratio(file=file, compressed_file=compressed_file, method=method)
    process_signal.print_time_each_function_takes_to_complete_processing(
        start_time=start_time, stop_time=stop_time, executed_line=method
    )

## Data Import & Formatting


In [16]:
data_dir = "../../data/"
data_file_list = glob(data_dir + "*.wav")
# current_file = data_file_list[0]

In [17]:
current_file = "../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav"
debug_file = "../../data/0052503c-2849-4f41-ab51-db382103690c.wav"

current_compressed_file = current_file + ".brainwire"
debug_compressed_file = debug_file + ".brainwire"

In [18]:
# Current file has less than 256 elements:
print(f"current_file: \n\t{current_file}\n")
sr, current_file_data = wavfile.read(current_file)
print(f"Number of unique elements: \n{len(np.unique(current_file_data))}\n")

# Debug file has more than 256 elements:
print(f"debug_file: \n\t{debug_file}\n")
sr, debug_file_data = wavfile.read(debug_file)
print(f"Number of unique elements: \n{len(np.unique(debug_file_data))}\n")

current_file: 
	../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav

Number of unique elements: 
158

debug_file: 
	../../data/0052503c-2849-4f41-ab51-db382103690c.wav

Number of unique elements: 
266



## Brainwire


### Testing methods of compression where the unique amplitudes are > 256.


In [None]:
# Debug_data: The number of unique amplitudes are > 256.
# method_of_compression == 'w'
start_time = time.time_ns()
data_brainwire = encode.compress(debug_file, quick=True)
stop_time = time.time_ns()

process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=data_brainwire,
    method="unique_amplitudes_l > 256; encode.compress(debug_file)",
)

In [None]:
# Debug_data: The number of unique amplitudes are > 256.
# method_of_compression == 'n'

start_time = time.time_ns()
parser = encode.initialize_argument_parser()
args = parser.parse_args([debug_file, debug_compressed_file, "-m=n"])
encode.main(args)
stop_time = time.time_ns()


with open(debug_compressed_file, "rb+") as fp:
    debug_compressed_file_data = fp.read()
    fp.close()


process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=debug_file_data,
    compressed_data=debug_compressed_file_data,
    method="unique_amplitudes_l > 256; encode.compress(debug_file)",
)

### Testing methods of compression where the unique amplitudes are < 256.


In [None]:
# Unique Amplitudes are < 256.
# Method of compression == 'u'

start_time = time.time_ns()
current_file_data_brainwire = encode.compress(current_file)
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=current_file_data_brainwire,
    method="unique_amplitudes_l < 256; encode.compress(current_file_data)",
)

In [None]:
# Unique Amplitudes are < 256
# Method of Compression == 'n'

start_time = time.time_ns()
parser = encode.initialize_argument_parser()
compressed_file = current_file + ".brainwire"
args = parser.parse_args([current_file, compressed_file, "-m=n"])
encode.main(args)

with open(current_file + ".brainwire", "rb+") as fp:
    compressed_file_data = fp.read()
    fp.close()
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=compressed_file_data,
    method="unique_amplitudes_l < 256; encode.main(); method of compression = 'n'",
)

In [None]:
start_time = time.time_ns()
parser = encode.initialize_argument_parser()
compressed_file = current_file + ".brainwire"
args = parser.parse_args([current_file, compressed_file, "-m=h"])
encode.main(args)

with open(current_file + ".brainwire", "rb+") as fp:
    compressed_file_data = fp.read()
    fp.close()
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=compressed_file_data,
    method="unique_amplitudes_l < 256; encode.compress(current_file_data)",
)

In [None]:
start_time = time.time_ns()
parser = encode.initialize_argument_parser()
compressed_file = current_file + ".brainwire"
args = parser.parse_args([current_file, compressed_file, "-m=u"])
encode.main(args)

with open(current_file + ".brainwire", "rb+") as fp:
    compressed_file_data = fp.read()
    fp.close()
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=compressed_file_data,
    method="unique_amplitudes_l < 256; encode.compress(current_file_data)",
)

In [14]:
start_time = time.time_ns()
sample_rate, data_brainwire_amplitude_current_file = decode.decompress(
    current_file_data_brainwire
)
stop_time = time.time_ns()

In [None]:
not_equal = False
for index, value in enumerate(current_file_data):
    if data_brainwire_amplitude_current_file[index] != value:
        not_equal = True

print(not_equal)

In [16]:
data_brainwire_amplitude_pd = pd.DataFrame(
    data_brainwire_amplitude_current_file, columns=["Amplitude"]
)

In [None]:
# Plotting the reconstructed neural current_file_data and the original raw neural current_file_data.

plt.figure(figsize=(12, 4))
plt.style.use("ggplot")

plt.title("Reconstructed Compressed Neural Data of Detected Neural Spikes")
plt.plot(current_file_data, color="purple", linewidth=1, label="Raw Neural Data")
plt.plot(
    data_brainwire_amplitude_pd,
    linewidth=0.5,
    color="teal",
    label="Decompressed Neural Data",
)
plt.grid(True)
plt.xlabel("Sample")
plt.ylabel("Amplitude")
plt.legend()
plt.axhline(y=0, color="black")
plt.show()

In [18]:
file_path = os.getcwd() + "/data/test_compression.brainwire"

In [19]:
with open(file_path, "wb+") as fp:
    written_data = fp.write(data_brainwire)
    fp.close()

In [None]:
written_data

## zlib


In [None]:
start_time = time.time_ns()
data_zlib = zlib.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_zlib,
    method="zlib.compress(current_file_data)",
)

## Gzip


In [None]:
start_time = time.time_ns()
data_gz = gzip.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_gz,
    method="gzip.compress(current_file_data)",
)

## bz2


In [None]:
start_time = time.time_ns()
data_bz2 = bz2.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_bz2,
    method="bz2.compress(current_file_data)",
)

## lzma


In [None]:
start_time = time.time_ns()
data_lzma = lzma.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_bz2,
    method="lzma.compress(current_file_data)",
)