# 1. Importing the libraries used

In [1]:
import wave
import numpy as np
from collections import Counter
import math
import matplotlib.pyplot as plt

DEBUG = False

# 2. Defining the helping functions

In [2]:
def split_in_tuples(file, tuple_size, debug=False):
    output = []
    for i in range(0, len(file), tuple_size):
        output.append(str(file[i:i+tuple_size]))
        if debug:
            print(f"{i}: {output[i]}")

    return output

In [3]:
def calculate_entropy(count):
    probabilities = {}
    total_count = sum(count.values())
    for key, value in count.items():
        probabilities[key] = value/total_count
     
    entropy = 0
    for p in probabilities.values():
        if p !=0:
            entropy += p*math.log2(p)
        
    return -entropy 
    

# 3. Calculating the entropies of the text files

In [None]:
text = np.fromfile("./datoteke/besedilo.txt", dtype="uint8")

entropies_text_uncompressed = {}

for order in range(1,6):
    text_split = split_in_tuples(text, order)
    if DEBUG:
        print(text_split[:10])
    frequencies_split = Counter(text_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_text_uncompressed[order] = entropy_split/order
    
for key, value in entropies_text_uncompressed.items():
    print(f"{key} order entropy(uncompressed): {value}")
    
print("\n")

text = np.fromfile("./datoteke/besedilo.zip", dtype="uint8")

entropies_text_zip = {}

for order in range(1,6):
    text_split = split_in_tuples(text, order)
    if DEBUG:
        print(text_split[:10])
    frequencies_split = Counter(text_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_text_zip[order] = entropy_split/order

for key, value in entropies_text_zip.items():
    print(f"{key} order entropy(unlocked zip): {value}")
    
print("\n")
    
text = np.fromfile("./datoteke/besedilo_locked.zip", dtype="uint8")

entropies_text_lzip = {}

for order in range(1,6):
    text_split = split_in_tuples(text, order)
    if DEBUG:
        print(text_split[:10])
    frequencies_split = Counter(text_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_text_lzip[order] = entropy_split/order

for key, value in entropies_text_lzip.items():
    print(f"{key} order entropy(locked zip): {value}")


In [None]:
plt.figure()
plt.plot(entropies_text_uncompressed.keys(),entropies_text_uncompressed.values(), marker='o', linestyle='-', label='Uncompressed')
plt.plot(entropies_text_zip.keys(),entropies_text_zip.values(), marker='o', linestyle='-', label='.zip (unlocked)')
plt.plot(entropies_text_lzip.keys(),entropies_text_lzip.values(), marker='o', linestyle='-', label='.zip (locked)')

# Add labels, title, and legend
plt.xlabel('Order (n)', fontsize=12)
plt.ylabel('Entropy (H)', fontsize=12)
plt.title('Entropy vs. Order', fontsize=14)
plt.grid(True)
plt.legend()
plt.show()

# 4. Calculating the entropies of the audio files

In [None]:
DEBUG = True
sound = np.fromfile("./datoteke/posnetek.wav", dtype="uint8")

entropies_sound_wav = {}

for order in range(1,6):
    sound_split = split_in_tuples(sound, order)
    if DEBUG:
        print(sound_split[:10])
    frequencies_split = Counter(sound_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_sound_wav[order] = entropy_split/order
    
for key, value in entropies_sound.items():
    print(f"{key} order entropy(uncompressed): {value}")
    
print("\n")
    
sound = np.fromfile("./datoteke/posnetek.mp3", dtype="uint8")

entropies_sound_mp3 = {}

for order in range(1,6):
    sound_split = split_in_tuples(sound, order)
    if DEBUG:
        print(sound_split[:10])
    frequencies_split = Counter(sound_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_sound_mp3[order] = entropy_split/order
    
for key, value in entropies_sound.items():
    print(f"{key} order entropy(mp3): {value}")
    
print("\n")
    
sound = np.fromfile("./datoteke/posnetek.flac", dtype="uint8")

entropies_sound_flac = {}

for order in range(1,6):
    sound_split = split_in_tuples(sound, order)
    if DEBUG:
        print(sound_split[:10])
    frequencies_split = Counter(sound_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_sound_flac[order] = entropy_split/order
    
for key, value in entropies_sound.items():
    print(f"{key} order entropy(flac): {value}")

In [None]:
plt.figure()
plt.plot(entropies_sound_wav.keys(),entropies_sound_wav.values(), marker='o', linestyle='-', label='.WAV')
plt.plot(entropies_sound_mp3.keys(),entropies_sound_mp3.values(), marker='o', linestyle='-', label='.mp3')
plt.plot(entropies_sound_flac.keys(),entropies_sound_flac.values(), marker='o', linestyle='-', label='.flac')

# Add labels, title, and legend
plt.xlabel('Order (n)', fontsize=12)
plt.ylabel('Entropy (H)', fontsize=12)
plt.title('Entropy vs. Order', fontsize=14)
plt.grid(True)
plt.legend()
plt.show()

# 5. Calculating the entropies of the image files

In [None]:
image = np.fromfile("./datoteke/slika.bmp", dtype="uint8")

entropies_image_bmp = {}

for order in range(1,6):
    image_split = split_in_tuples(image, order)
    if DEBUG:
        print(image_split[:10])
    frequencies_split = Counter(image_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_image_bmp[order] = entropy_split/order
    
for key, value in entropies_image_bmp.items():
    print(f"{key} order entropy(uncompressed): {value}")
    
print("\n")
    
image = np.fromfile("./datoteke/slika.jpg", dtype="uint8")

entropies_image_jpg = {}

for order in range(1,6):
    image_split = split_in_tuples(image, order)
    if DEBUG:
        print(image_split[:10])
    frequencies_split = Counter(image_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_image_jpg[order] = entropy_split/order
    
for key, value in entropies_image_jpg.items():
    print(f"{key} order entropy(jpg): {value}")
    
print("\n")
    
image = np.fromfile("./datoteke/slika.png", dtype="uint8")

entropies_image_png = {}

for order in range(1,6):
    image_split = split_in_tuples(image, order)
    if DEBUG:
        print(image_split[:10])
    frequencies_split = Counter(image_split)
    entropy_split = calculate_entropy(frequencies_split)
    entropies_image_png[order] = entropy_split/order
    
for key, value in entropies_image_png.items():
    print(f"{key} order entropy(png): {value}")


In [None]:
plt.figure()
plt.plot(entropies_image_bmp.keys(),entropies_image_bmp.values(), marker='o', linestyle='-', label='.bmp')
plt.plot(entropies_image_jpg.keys(),entropies_image_jpg.values(), marker='o', linestyle='-', label='.jpg')
plt.plot(entropies_image_png.keys(),entropies_image_png.values(), marker='o', linestyle='-', label='.png')

# Add labels, title, and legend
plt.xlabel('Order (n)', fontsize=12)
plt.ylabel('Entropy (H)', fontsize=12)
plt.title('Entropy vs. Order', fontsize=14)
plt.grid(True)
plt.legend()
plt.show()