# Comparing and Contrasting Different Compression Methods


In [27]:
# Imports
from glob import glob
from scipy.io import wavfile
from signal_processing_utilities import process_signal
import time
import os
import numpy as np

import zlib
import gzip
import bz2
import lzma

from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader

# Import encode
spec = spec_from_loader("encode", SourceFileLoader("encode", "../.././encode"))
encode = module_from_spec(spec)
spec.loader.exec_module(encode)

# Import decode
spec = spec_from_loader("decode", SourceFileLoader("decode", "../.././decode"))
decode = module_from_spec(spec)
spec.loader.exec_module(decode)

In [2]:
def compare_compression_ratio(file, compressed_file: str, method: str = None):
    """This function prints the compression ratio of two files.

    Args:
        file (numpy.ndarray): This is the array of amplitudes before
                              compression.
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        method (str):  This is the string representing the method of
                       compression. Defaults to None.
    """
    percent_compression = (1 - (len(compressed_file) / len(file.tobytes()))) * 100
    if method != None:
        print(f"\nMethod of Compression: {method}")
    else:
        print("\n")
    print(f"Initial file size: {len(file.tobytes())} bytes.")
    print(f"Compressed File Size: {len(compressed_file)} bytes.")
    print(f"Percent of Compression: {percent_compression:.2f}%")
    print(f"\n")

In [3]:
def print_compression_efficiency_metrics_wrapper(
    file: str, compressed_file: str, start_time: int, stop_time: int, method: str
):
    """This is a wrapper function to print the start and stop times as
       well as the ratio of compression.

    Args:
        file (str): This is the array of amplitudes before compression
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        start_time (int): This is the initial starting time in
                          nanoseconds.
        stop_time (int): This is the final time in nanoseconds of the
                         chosen method of compression.
        method (str): This is the string representing the
                                method of compression.
    """
    compare_compression_ratio(file=file, compressed_file=compressed_file, method=method)
    process_signal.print_time_each_function_takes_to_complete_processing(
        start_time=start_time, stop_time=stop_time, executed_line=method
    )

In [4]:
data_dir = "../../data"
data_file_list = glob(data_dir + "/*.wav")
current_file = data_file_list[0]
current_file = "../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav"

In [5]:
# current_file = "../../data/0ab237b7-fb12-4687-afed-8d1e2070d621.wav"

In [None]:
current_file

In [20]:
rate, data = wavfile.read(current_file)

In [25]:
debug_file = "../../data/0052503c-2849-4f41-ab51-db382103690c.wav"

In [28]:
rate, data = wavfile.read(debug_file)

In [None]:
len(np.unique(data))

In [40]:
len(data.tobytes())

197378

In [42]:
len(data_brainwire)

144725

In [43]:
data_brainwire

b'fee1a364141d0b21fdf71ef61217e61befa69a9b065ff9a200031fde5e131c659d04010560020e5c66a7e7dbf111ff18e85beda4f4e2fa61070afb9f20dd63f89ee5a51aea5999f00fa0fc2223100962e4265ada19dc9c16f30de3f5080c1525e9ecf25d242767689828d8a8d9ebeea1e0df091x031x061x021x051x021x0x1x031x0212031x0x1x0312021x041x0x1x041x0x1x0x1x021x0x12031x0x13031x0x14041x0x140x1x031x0x140x12021x0x15031x0x16041x0x160x1x021x0x17021203130x1x051x031x0x1x021x0x1x0x1x021x0x120x120412021x02120212031202120x1x021202120x120x1202130x120x1x0x130215041x0312021x031x021x021x031x021x021x021x031x021x021x021x0212021x021x021x0x12021x021x0213021x021x0x12021x0213021x0x1x0x1x021x0x12031x021x0x12021x021x021x0x12021x0x12021x0x120213021x0x120x12021x0x1402120x1x02140x1x031x0x1x02120x1x0x1x0x1x0x1x0x130x120x1x0x15061205130413031x0x12031203120312021303120x1x021203120x1x0x1x0x1203120x1x0x120x1203120x1x0x1503120x14031502130x1x02120x1x0x1x0x120x1x0x120x120x1x0x150x120x120x130x120x1402120x140x1x04120x140x1x03130x140x1x02130x140x1x0x130x140x140x1502120x150x130x

In [41]:
start_time = time.time_ns()
data_brainwire = encode.compress(debug_file, quick=True)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_brainwire,
    method="encode.compress(debug_file)",
)


Method of Compression: encode.compress(debug_file)
Initial file size: 197378 bytes.
Compressed File Size: 144725 bytes.
Percent of Compression: 26.68%



Executed Line: encode.compress(debug_file)...
Time Δ Nanoseconds: 532091000
Time Δ Microseconds: 532091.0
Time Δ Milliseconds: 532.091
Time Δ Seconds: 0.532091




## Brainwire


In [37]:
data

array([-1570, -2018, -2338, ..., -2210, -2402, -2338], dtype=int16)

In [None]:
start_time = time.time_ns()
data_brainwire = encode.compress(current_file)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_brainwire,
    method="encode.compress(data)",
)

In [9]:
start_time = time.time_ns()
sample_rate, data_brainwire_amplitude = decode.decompress(data_brainwire)
stop_time = time.time_ns()

In [10]:
import pandas as pd
import matplotlib.pyplot as plt

In [11]:
data_brainwire_amplitude_pd = pd.DataFrame(
    data_brainwire_amplitude, columns=["Amplitude"]
)

In [None]:
# Plotting the reconstructed neural data and the original raw neural data.

plt.figure(figsize=(12, 4))
plt.style.use("ggplot")


plt.title("Reconstructed Compressed Neural Data of Detected Neural Spikes")
plt.plot(data, color="purple", linewidth=0.5, label="Raw Neural Data")
plt.plot(
    data_brainwire_amplitude_pd,
    linewidth=0.5,
    color="teal",
    label="Decompressed Neural Data",
)
plt.grid(True)
plt.legend()
plt.axhline(y=0, color="black")
plt.show()

In [13]:
file_path = os.getcwd() + "/data/test_compression.brainwire"

In [14]:
with open(file_path, "wb+") as fp:
    written_data = fp.write(data_brainwire)
    fp.close()

In [None]:
written_data

## zlib


In [None]:
start_time = time.time_ns()
data_zlib = zlib.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_zlib,
    method="zlib.compress(data)",
)

## Gzip


In [None]:
start_time = time.time_ns()
data_gz = gzip.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_gz,
    method="gzip.compress(data)",
)

## bz2


In [None]:
start_time = time.time_ns()
data_bz2 = bz2.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_bz2,
    method="bz2.compress(data)",
)

## lzma


In [None]:
start_time = time.time_ns()
data_lzma = lzma.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_bz2,
    method="lzma.compress(data)",
)