# Example

## Imports

In [None]:
import os
import shutil
import pandas as pd

import dc_semester_project.compressor as compressor
import dc_semester_project.decompressor as decompressor
import dc_semester_project.checker as checker

## Variables

In [14]:
samples_folder = "../data/samples"
compressed_folder = "../data/compressed"
decompressed_folder = "../data/decompressed"


## Clean folders

In [None]:
def clean_folders():
    for folder in [compressed_folder, decompressed_folder]:
        if os.path.exists(folder):
            for filename in os.listdir(folder):
                file_path = os.path.join(folder, filename)
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
        else:
            # Create the folder if it doesn't exist
            os.makedirs(folder)
            print(f"Created folder: {folder}")

    print("Data folders cleaned successfully!\n")

In [16]:
clean_folders()

Data folders cleaned successfully!



## Examination of Entropy

In [4]:
for filename in os.listdir(samples_folder):
        if filename.endswith(".DS_Store"):
            continue
        print(f"Processing file: {filename}\n")
        original_file_path = os.path.join(samples_folder, filename)
        checker.entropy(original_file_path)
        print("\n")

Processing file: sample1.data

Entropy: -0.00 bits per byte


Processing file: sample.bmp

Entropy: 2.29 bits per byte


Processing file: sample1b.data

Entropy: 0.00 bits per byte


Processing file: sample7.data

Entropy: 7.69 bits per byte


Processing file: sample.RW2

Entropy: 7.53 bits per byte


Processing file: sample5b.data

Entropy: 8.00 bits per byte


Processing file: sample6.data

Entropy: 8.00 bits per byte


Processing file: sample5.data

Entropy: 8.00 bits per byte


Processing file: sample4.data

Entropy: 0.40 bits per byte


Processing file: sample3.data

Entropy: 1.00 bits per byte


Processing file: sample2.data

Entropy: 8.00 bits per byte




## Tests with LZ77

In [44]:
algorithm = "lz77"

### Testing on provided .data types

In [None]:
clean_folders()


for filename in os.listdir(samples_folder):
    if filename.endswith(".data"):
        
        print(f"Processing file: {filename}\n")
        
        # Construct file paths
        original_file_path = os.path.join(samples_folder, filename)
        compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"
        decompressed_file_path = f"{decompressed_folder}/decompressed_{filename}"

        # Compression
        compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

        # Decompression
        decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

        # Verification
        checker.checker(original_file_path, decompressed_file_path)

        # Compression ratio
        checker.compression_ratio(original_file_path, compressed_file_path)
        print("\n")

Data folders cleaned successfully!

Processing file: sample1.data

File compressed and saved to: ../data/compressed/compressed.lz77
File decompressed and saved to: ../data/decompressed/decompressed_sample1.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 381142 bytes
Compression ratio: 2.62


Processing file: sample1b.data

File compressed and saved to: ../data/compressed/compressed.lz77
File decompressed and saved to: ../data/decompressed/decompressed_sample1b.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 381142 bytes
Compression ratio: 2.62


Processing file: sample7.data

File compressed and saved to: ../data/compressed/compressed.lz77
File decompressed and saved to: ../data/decompressed/decompressed_sample7.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 6881406 bytes
Compression ratio: 0.15


Processing file: sample5b.da

### Testing on .RW2

In [None]:
clean_folders()

ext = ".RW2"
original_file_path = f"{samples_folder}/sample{ext}"
decompressed_file_path = f"{decompressed_folder}/decompressed{ext}"
compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"


# Compression
compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

# Decompression
decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

# Verification
checker.checker(original_file_path, decompressed_file_path)

# Compression ratio
checker.compression_ratio(original_file_path, compressed_file_path)

Data folders cleaned successfully!

File compressed and saved to: ../data/compressed/compressed.lz77
File decompressed and saved to: ../data/decompressed/decompressed.RW2
The original and decompressed files are identical.
Original size: 36087808 bytes
Compressed size: 253434491 bytes
Compression ratio: 0.14


### Testing on .bmp

In [None]:
clean_folders()

ext = ".bmp"
original_file_path = f"{samples_folder}/sample{ext}"
decompressed_file_path = f"{decompressed_folder}/decompressed{ext}"
compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"


# Compression
compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

# Decompression
decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

# Verification
checker.checker(original_file_path, decompressed_file_path)

# Compression ratio
checker.compression_ratio(original_file_path, compressed_file_path)


Data folders cleaned successfully!

File compressed and saved to: ../data/compressed/compressed.lz77
File decompressed and saved to: ../data/decompressed/decompressed.bmp
The original and decompressed files are identical.
Original size: 28125138 bytes
Compressed size: 10719076 bytes
Compression ratio: 2.62


## Test with LZ78

In [45]:
algorithm = "lz78"

### Testing on provided .data types

In [None]:
clean_folders()


for filename in os.listdir(samples_folder):
    if filename.endswith(".data"):
        
        print(f"Processing file: {filename}\n")
        
        # Construct file paths
        original_file_path = os.path.join(samples_folder, filename)
        compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"
        decompressed_file_path = f"{decompressed_folder}/decompressed_{filename}"

        # Compression
        compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

        # Decompression
        decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

        # Verification
        checker.checker(original_file_path, decompressed_file_path)

        # Compression ratio
        checker.compression_ratio(original_file_path, compressed_file_path)
        print("\n")

Data folders cleaned successfully!

Processing file: sample1.data

File compressed and saved to: ../data/compressed/compressed.lz78
File decompressed and saved to: ../data/decompressed/decompressed_sample1.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 9662 bytes
Compression ratio: 103.50


Processing file: sample1b.data

File compressed and saved to: ../data/compressed/compressed.lz78
File decompressed and saved to: ../data/decompressed/decompressed_sample1b.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 9671 bytes
Compression ratio: 103.40


Processing file: sample7.data

File compressed and saved to: ../data/compressed/compressed.lz78
File decompressed and saved to: ../data/decompressed/decompressed_sample7.data
The original and decompressed files are identical.
Original size: 1000000 bytes
Compressed size: 3478848 bytes
Compression ratio: 0.29


Processing file: sample5b.da

### Testing on .RW2

In [None]:
clean_folders()

ext = ".RW2"
original_file_path = f"{samples_folder}/sample{ext}"
decompressed_file_path = f"{decompressed_folder}/decompressed{ext}"
compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"


# Compression
compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

# Decompression
decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

# Verification
checker.checker(original_file_path, decompressed_file_path)

# Compression ratio
checker.compression_ratio(original_file_path, compressed_file_path)


Data folders cleaned successfully!

File compressed and saved to: ../data/compressed/compressed.lz78
File decompressed and saved to: ../data/decompressed/decompressed.RW2
The original and decompressed files are identical.
Original size: 36087808 bytes
Compressed size: 95511455 bytes
Compression ratio: 0.38


### Testing on .bmp

In [46]:
clean_folders()

ext = ".bmp"
original_file_path = f"{samples_folder}/sample{ext}"
decompressed_file_path = f"{decompressed_folder}/decompressed{ext}"
compressed_file_path = f"{compressed_folder}/compressed.{algorithm}"


# Compression
compressor.compress_file(original_file_path, algorithm=algorithm, output_path=compressed_file_path)

# Decompression
decompressor.decompress_file(compressed_file_path, output_path=decompressed_file_path, algorithm=algorithm)

# Verification
checker.checker(original_file_path, decompressed_file_path)

# Compression ratio
checker.compression_ratio(original_file_path, compressed_file_path)


Data folders cleaned successfully!

File compressed and saved to: ../data/compressed/compressed.lz78
File decompressed and saved to: ../data/decompressed/decompressed.bmp
The original and decompressed files are identical.
Original size: 28125138 bytes
Compressed size: 171387 bytes
Compression ratio: 164.10


## Results (.data)

In [11]:
pd.read_csv("../data/entropy_compression - Sheet1.csv")


Unnamed: 0,File,Entropy (bits per byte),Compression Ratio (LZ77),Compression Ratio (LZ78)
0,sample1.data,0.0,2.62,103.5
1,sample1b.data,0.0,2.62,103.4
2,sample2.data,8.0,0.12,5.24
3,sample3.data,1.0,0.67,2.05
4,sample4.data,0.4,1.4,4.69
5,sample5.data,8.0,0.13,0.32
6,sample5b.data,8.0,2.58,4.45
7,sample6.data,8.0,0.14,0.3
8,sample7.data,7.69,0.15,0.29
9,sample.RW2,7.53,0.14,0.38
