# Comparing and Contrasting Different Compression Methods


In [2]:
# Imports
from glob import glob
from scipy.io import wavfile
from signal_processing_utilities import process_signal
import time

import zlib
import gzip
import bz2
import lzma

In [25]:
def compare_compression_ratio(file, compressed_file: str, method: str = None):
    """This function prints the compression ratio of two files.

    Args:
        file (numpy.ndarray): This is the array of amplitudes before
                              compression.
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        method (str):  This is the string representing the method of
                       compression. Defaults to None.
    """
    percent_compression = (len(compressed_file) / len(file.tobytes())) * 100
    if method != None:
        print(f"\nMethod of Compression: {method}")
    else:
        print("\n")
    print(f"Initial file size: {len(file.tobytes())} bytes.")
    print(f"Compressed File Size: {len(compressed_file)} bytes.")
    print(f"Percent of Compression: {percent_compression:.2f}%")
    print(f"\n")

In [29]:
def print_compression_efficiency_metrics_wrapper(
    file: str, compressed_file: str, start_time: int, stop_time: int, method: str
):
    """This is a wrapper function to print the start and stop times as
       well as the ratio of compression.

    Args:
        file (str): This is the array of amplitudes before compression
        compressed_file (str): This is the compressed representation of
                               the amplitudes after the method of
                               compression has been applied.
        start_time (int): This is the initial starting time in
                          nanoseconds.
        stop_time (int): This is the final time in nanoseconds of the
                         chosen method of compression.
        method (str): This is the string representing the
                                method of compression.
    """
    compare_compression_ratio(file=file, compressed_file=compressed_file, method=method)
    process_signal.print_time_each_function_takes_to_complete_processing(
        start_time=start_time, stop_time=stop_time, executed_line=method
    )

In [30]:
data_dir = "../../data"
data_file_list = glob(data_dir + "/*.wav")

In [31]:
current_file = data_file_list[0]

In [32]:
current_file

'../../data/102b47d9-371e-412a-8995-0dc6115ab2bb.wav'

In [33]:
rate, data = wavfile.read(current_file)

## zlib


In [39]:
start_time = time.time_ns()
data_zlib = zlib.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_zlib,
    method="zlib.compress(data)",
)


Executed Line: zlib.compress(data)...
Time Δ Nanoseconds: 40383000
Time Δ Microseconds: 40383.0
Time Δ Milliseconds: 40.383
Time Δ Seconds: 0.040383



Method of Compression: zlib.compress(data)
Initial file size: 197398 bytes.
Compressed File Size: 87243 bytes.
Percent of Compression: 44.20%




## Gzip


In [40]:
start_time = time.time_ns()
data_gz = gzip.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_gz,
    method="gzip.compress(data)",
)


Executed Line: gzip.compress(data)...
Time Δ Nanoseconds: 55205000
Time Δ Microseconds: 55205.0
Time Δ Milliseconds: 55.205
Time Δ Seconds: 0.055205



Method of Compression: gzip.compress(data)
Initial file size: 197398 bytes.
Compressed File Size: 86176 bytes.
Percent of Compression: 43.66%




## bz2


In [41]:
start_time = time.time_ns()
data_bz2 = bz2.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_bz2,
    method="bz2.compress(data)",
)


Executed Line: bz2.compress(data)...
Time Δ Nanoseconds: 49975000
Time Δ Microseconds: 49975.0
Time Δ Milliseconds: 49.975
Time Δ Seconds: 0.049975



Method of Compression: bz2.compress(data)
Initial file size: 197398 bytes.
Compressed File Size: 63435 bytes.
Percent of Compression: 32.14%




## lzma


In [43]:
start_time = time.time_ns()
data_lzma = lzma.compress(data)
stop_time = time.time_ns()

print_compression_efficiency_metrics_wrapper(
    start_time=start_time,
    stop_time=stop_time,
    file=data,
    compressed_file=data_bz2,
    method="lzma.compress(data)",
)


Executed Line: lzma.compress(data)...
Time Δ Nanoseconds: 59149000
Time Δ Microseconds: 59149.0
Time Δ Milliseconds: 59.149
Time Δ Seconds: 0.059149



Method of Compression: lzma.compress(data)
Initial file size: 197398 bytes.
Compressed File Size: 63435 bytes.
Percent of Compression: 32.14%


