# Comparing and Contrasting Different Compression Methods


In [1]:
# Imports
from glob import glob
from scipy.io import wavfile
from signal_processing_utilities import process_signal
import time
import os
import numpy as np

import zlib
import gzip
import bz2
import lzma

from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader

import pandas as pd
import matplotlib.pyplot as plt

# Import encode
spec = spec_from_loader("encode", SourceFileLoader("encode", "../.././encode"))
encode = module_from_spec(spec)
spec.loader.exec_module(encode)

# Import decode
spec = spec_from_loader("decode", SourceFileLoader("decode", "../.././decode"))
decode = module_from_spec(spec)
spec.loader.exec_module(decode)

In [2]:
def compare_compression_ratio(file, compressed_file: str, method: str = None):
    """This function prints the compression ratio of two files.

    Args:
        file (numpy.ndarray): This is the array of amplitudes before
                              compression.
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        method (str):  This is the string representing the method of
                       compression. Defaults to None.
    """
    percent_compression = (1 - (len(compressed_file) / len(file.tobytes()))) * 100
    if method != None:
        print(f"\nMethod of Compression: {method}")
    else:
        print("\n")
    print(f"Initial file size: {len(file.tobytes())} bytes.")
    print(f"Compressed File Size: {len(compressed_file)} bytes.")
    print(f"Percent of Compression: {percent_compression:.2f}%")
    print(f"\n")

In [3]:
def print_compression_efficiency_metrics_wrapper(
    file: str, compressed_file: str, start_time: int, stop_time: int, method: str
):
    """This is a wrapper function to print the start and stop times as
       well as the ratio of compression.

    Args:
        file (str): This is the array of amplitudes before compression
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        start_time (int): This is the initial starting time in
                          nanoseconds.
        stop_time (int): This is the final time in nanoseconds of the
                         chosen method of compression.
        method (str): This is the string representing the
                                method of compression.
    """
    compare_compression_ratio(file=file, compressed_file=compressed_file, method=method)
    process_signal.print_time_each_function_takes_to_complete_processing(
        start_time=start_time, stop_time=stop_time, executed_line=method
    )

In [4]:
data_dir = "../../data/"
data_file_list = glob(data_dir + "*.wav")
current_file = data_file_list[0]
current_file = "../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav"

In [5]:
# current_file = "../../current_file_data/0ab237b7-fb12-4687-afed-8d1e2070d621.wav"

In [6]:
current_file

'../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav'

In [7]:
rate, current_file_data = wavfile.read(current_file)

In [8]:
debug_file = "../../data/0052503c-2849-4f41-ab51-db382103690c.wav"

In [9]:
rate, debug_data = wavfile.read(debug_file)

In [10]:
len(np.unique(debug_data))

266

In [11]:
len(np.unique(current_file_data))

158

## Brainwire


In [12]:
current_file_data

array([-352, -416, -288, ...,  287,  223, -288], dtype=int16)

In [13]:
# Debug_data: The number of unique amplitudes are > 256.
# method_of_compression == 'w'
start_time = time.time_ns()
data_brainwire = encode.compress(debug_file, quick=True)
stop_time = time.time_ns()

process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=data_brainwire,
    method="unique_amplitudes_l > 256; encode.compress(debug_file)",
)


Method of Compression: unique_amplitudes_l > 256; encode.compress(debug_file)
Initial File Size: 197398 bytes.
Compressed File Size: 114178 bytes.
Reduction in File Size: 83220 bytes.
Percent of Reduction: 42.16%
The file was reduced in size by 42.16% of the original size of the file. 
Percent of Compression: 57.84%
The compressed file size is 57.84% of the original file size.
Compression Ratio: 1.73



Executed Line: unique_amplitudes_l > 256; encode.compress(debug_file)...
Time Δ Nanoseconds: 504710000
Time Δ Microseconds: 504710.0
Time Δ Milliseconds: 504.71
Time Δ Seconds: 0.50471




In [15]:
current_file_data

array([-352, -416, -288, ...,  287,  223, -288], dtype=int16)

In [16]:
data_brainwire

b'0063babe6f6cbdd0cc44475dd7bb0a1702fcf8192030467ca38192aa69687aaebc4e4d60809e706ddce6f1fe2ad6c45e91829f90cbcf01f3e82dead3b46577898c843432db0efff9fa3ed8c65ab3afc1dd142be7f0082621284cc9c8579b9d8a8b6eab96ced53b43e02cd1cadf37c25b3c3a40557ead05091e1a07fbf712232e04161149c76a8e95765245e12fda546ba1758f7da478867fa22210e5183633d9cdc0b651c5e4e2d4484f9897a66774b14b1b0dee0313edfdf639f2ec295679b2ac71c35358b75cbf728594a061a8998d6483de0f2724d20b1c1d1f7b3559b05f66a762b542e9eff5f4152531eb060c413887937350b8a99a889c4a3de33fb9a50x1x071x061x0x1x06130512041x0x1x0412031x0412021x0x1x0412021304120x1x0x1x04120x1304130x1x0414021x04140x120415041x041503120415021x0x1x04150212021x041502120x120415021404150x1x0x1x04150x130417031x021x031x0x1203120x1x0314021x041x021x0312021x0212021x0x1x021x021x0x1x0x12021x0x12021x021x0x120x1x0x1x021x0x120x13021x0x140212021x02120x1202130x1x02140x1x0215041x0215031x0x1x02150312021x021503120x12021503140215021202150x1202170x1x041x0x1x03120x1x021x0x1x0x1x02130x1x0x1x061x0x1x0x1x05120x1x0x1x04

In [17]:
# Unique Amplitudes are < 256.
# Method of compression == 'u'

start_time = time.time_ns()
current_file_data_brainwire = encode.compress(current_file)
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=current_file_data_brainwire,
    method="unique_amplitudes_l < 256; encode.compress(current_file_data)",
)


Method of Compression: unique_amplitudes_l < 256; encode.compress(current_file_data)
Initial File Size: 197398 bytes.
Compressed File Size: 80483 bytes.
Reduction in File Size: 116915 bytes.
Percent of Reduction: 59.23%
The file was reduced in size by 59.23% of the original size of the file. 
Percent of Compression: 40.77%
The compressed file size is 40.77% of the original file size.
Compression Ratio: 2.45



Executed Line: unique_amplitudes_l < 256; encode.compress(current_file_data)...
Time Δ Nanoseconds: 177667000
Time Δ Microseconds: 177667.0
Time Δ Milliseconds: 177.667
Time Δ Seconds: 0.177667




In [19]:
start_time = time.time_ns()
parser = encode.initialize_argument_parser()
compressed_file = current_file + ".brainwire"
args = parser.parse_args([current_file, compressed_file, "-m=n"])
encode.main(args)

with open(current_file + ".brainwire", "rb+") as fp:
    compressed_file_data = fp.read()
    fp.close()

In [None]:
compressed_file_data

In [20]:
stop_time = time.time_ns()
process_signal.print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    original_data=current_file_data,
    compressed_data=compressed_file_data,
    method="unique_amplitudes_l < 256; encode.compress(current_file_data)",
)


Method of Compression: unique_amplitudes_l < 256; encode.compress(current_file_data)
Initial File Size: 197398 bytes.
Compressed File Size: 88473 bytes.
Reduction in File Size: 108925 bytes.
Percent of Reduction: 55.18%
The file was reduced in size by 55.18% of the original size of the file. 
Percent of Compression: 44.82%
The compressed file size is 44.82% of the original file size.
Compression Ratio: 2.23



Executed Line: unique_amplitudes_l < 256; encode.compress(current_file_data)...
Time Δ Nanoseconds: 4301394000
Time Δ Microseconds: 4301394.0
Time Δ Milliseconds: 4301.394
Time Δ Seconds: 4.301394




In [16]:
start_time = time.time_ns()
sample_rate, data_brainwire_amplitude_current_file = decode.decompress(
    current_file_data_brainwire
)
stop_time = time.time_ns()

In [None]:
not_equal = False
for index, value in enumerate(current_file_data):
    if data_brainwire_amplitude_current_file[index] != value:
        not_equal = True

print(not_equal)

In [18]:
data_brainwire_amplitude_pd = pd.DataFrame(
    data_brainwire_amplitude_current_file, columns=["Amplitude"]
)

In [None]:
# Plotting the reconstructed neural current_file_data and the original raw neural current_file_data.

plt.figure(figsize=(12, 4))
plt.style.use("ggplot")

plt.title("Reconstructed Compressed Neural Data of Detected Neural Spikes")
plt.plot(current_file_data, color="purple", linewidth=1, label="Raw Neural Data")
plt.plot(
    data_brainwire_amplitude_pd,
    linewidth=0.5,
    color="teal",
    label="Decompressed Neural Data",
)
plt.grid(True)
plt.xlabel("Sample")
plt.ylabel("Amplitude")
plt.legend()
plt.axhline(y=0, color="black")
plt.show()

In [20]:
file_path = os.getcwd() + "/data/test_compression.brainwire"

In [21]:
with open(file_path, "wb+") as fp:
    written_data = fp.write(data_brainwire)
    fp.close()

In [None]:
written_data

## zlib


In [None]:
start_time = time.time_ns()
data_zlib = zlib.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_zlib,
    method="zlib.compress(current_file_data)",
)

## Gzip


In [None]:
start_time = time.time_ns()
data_gz = gzip.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_gz,
    method="gzip.compress(current_file_data)",
)

## bz2


In [None]:
start_time = time.time_ns()
data_bz2 = bz2.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_bz2,
    method="bz2.compress(current_file_data)",
)

## lzma


In [None]:
start_time = time.time_ns()
data_lzma = lzma.compress(current_file_data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=current_file_data,
    compressed_file=data_bz2,
    method="lzma.compress(current_file_data)",
)