## Compression algorithm

In [None]:
def compress(load_path, save_img_path, image_name, save_data_path):

    ######################################
    #########        BEGIN       #########
    ######### K-means clustering #########
    ######################################

    ### !!!
    ### The following K-means clustering code is a modified version of "07-ImageSegmentation.ipynb"
    ### !!!
    
    import os
    from imageio import imread
    import numpy as np
    from skimage.color import rgb2lab, lab2rgb
    from skimage.transform import rescale
    from sklearn.metrics.pairwise import euclidean_distances
    import matplotlib.pyplot as plt
    from PIL import Image

    image_raw = imread(load_path)

    image_height = 400
    image = rescale(image_raw, image_height/image_raw.shape[0], mode='reflect', channel_axis=2, anti_aliasing=True)
    shape = image.shape


    X = rgb2lab(image).reshape(-1, 3)

    def cluster_assignments(X, Y):
        return np.argmin(euclidean_distances(X,Y), axis=1)
    
    K = 32
    centers = np.array([X.mean(0) + (np.random.randn(3)/10) for _ in range(K)])
    y_kmeans = cluster_assignments(X, centers)

    maxreps = 100
    threshold = 0.1


    for b in range(maxreps):
        prev_centers = np.copy(centers)

        # assign each point to the closest center
        y_kmeans = cluster_assignments(X, centers)

        # move the centers to the mean of their assigned points (if any)
        for i, c in enumerate(centers):
            points = X[y_kmeans == i]
            if len(points):
                centers[i] = points.mean(0)
        
        #center_movement = np.mean(np.linalg.norm(centers - prev_centers, axis=1))
        max_distance = np.max(np.linalg.norm(centers - prev_centers, axis=1))

        if max_distance < threshold:
            print(f"\n\nCenter movement: {round(max_distance,3)}. Convergence achieved within", b+1, "iterations.\n\n")
            break
        elif b+1 == maxreps:
            print(f"\n\nCenter movement: {round(max_distance,3)}. Maximum repetitions reached within", b+1, "iterations.\n\n")
            break


    ######################################
    #########         END        #########
    ######### K-means clustering #########
    ######################################


    from scipy.spatial.distance import cdist

    def find_closest_centroid_color2(oldpixel, palette):
        distances = cdist([oldpixel], palette).flatten()
        closest_index = np.argmin(distances)
        return closest_index



    clustered_image = lab2rgb(centers[y_kmeans,:].reshape(shape[0], shape[1], 3))

    # Centroid colors
    #lab2rgb retrieves the rgb color in the point of the centroid given in "centers".
    #(standardized in "centers", therefore multiplied by 255 = RGB
    palette = np.uint8(lab2rgb(centers)*255)



    ######################################
    #########        BEGIN       #########
    ######### Grayscale function #########
    ######################################

    def grayscale(image):
        pil_img = Image.fromarray(np.uint8(image))
        gray_img = pil_img.convert('L')
        gray_np = np.array(gray_img)
        grayscale_img = np.stack((gray_np, gray_np, gray_np), axis=-1)
        return grayscale_img

    ######################################
    #########         END        #########
    ######### Grayscale function #########
    ######################################




    ######################################
    #########        BEGIN       #########
    #########      Dithering     #########
    ######################################


    img = image.astype(np.float64)*255  # original image (rescaled)

    original_image = Image.fromarray(np.uint8(img))
    original_image_array = np.uint8(img)

    height, width = img.shape[:2]

    for y in range(0, width):
        for x in range(0, height):
            oldpixel = np.copy(img[x, y])
            newpixel = palette[find_closest_centroid_color2(oldpixel, palette)]
            img[x, y] = newpixel
            quant_err = oldpixel - newpixel


            if x < height - 1: 
                img[x + 1, y    ] += quant_err * 7/16
            if y < width - 1: 
                img[x - 1, y + 1] += quant_err * 3/16
            if y < width - 1: 
                img[x    , y + 1] += quant_err * 5/16
            if y < width - 1 and x < height - 1: 
                img[x + 1, y + 1] += quant_err * 1/16


    # Save dithered image
    dithered_img = Image.fromarray(np.uint8(img))
    dithered_img_array = np.uint8(img)
    original_image.save(f'{save_img_path}/original_img{image_name}.bmp', format='BMP')
    dithered_img.save(f'{save_img_path}/dithered_img{image_name}.bmp', format='BMP')



    ######################################
    #########         END        #########
    #########      Dithering     #########
    ######################################



    ######################################
    #########        BEGIN       #########
    #########      Statistik     #########
    ######################################

    #####            #####
    ##### -- SSIM -- #####


    ### Dithering compression ###

    from skimage.metrics import structural_similarity as ssim
    from skimage import color

    # Convert to grayscale and cast back to uint8
    original_array = np.uint8(grayscale(original_image_array))
    dithered_array = np.uint8(grayscale(dithered_img_array))

    # Calculate SSIM
    ssim1, _ = ssim(original_array, dithered_array, full=True, channel_axis=2)

    print(f"SSIM Index: {round(ssim1, 3)}")




    ### JPEG compression ###

    # Load the images using Pillow
    uncompressed_img = Image.fromarray(np.uint8(image*255))
    uncompressed_img.save(f'{save_img_path}/jpg_img{image_name}.jpg', "JPEG", optimize = True, quality = 1) 

    compressed_img = Image.open(f'{save_img_path}/jpg_img{image_name}.jpg')
    
    # Convert Pillow images to NumPy arrays
    original_array = np.uint8(grayscale(original_image_array))
    compressed_array = np.uint8(grayscale(np.array(compressed_img)))

    # Calculate SSIM
    ssim2, _ = ssim(original_array, compressed_array, full=True, channel_axis=2)

    print(f"SSIM Index: {round(ssim2, 3)}")



    ######################################
    #########         END        #########
    #########      Statistik     #########
    ######################################




    ### Save data in a semicolon separated .txt file
    roundby = 3
    ssim1, ssim2 = (
        round(ssim1, roundby), round(ssim2, roundby)
    )
    with open(save_data_path, "a") as file:
        file.write(f"{image_name};{ssim1};{ssim2}\n")




Function for counting files:

In [None]:
import os

def count_files(load_path):
    files = [f for f in os.listdir(load_path) if os.path.isfile(os.path.join(load_path, f))]
    num_files = len(files)
    return num_files

Data-gathering for multiple files. (!: Run the above code-blocks beforehand).

In [None]:
uncompressed_path = f'C:/Users/Lenovo/Desktop/DTU/1. semester/Introduktion til Intelligente Systemer/Eksamen/3-ugers projekt/data/uncompressed/'

save_data_path = r'C:\Users\Lenovo\Desktop\DTU\1. semester\Introduktion til Intelligente Systemer\Eksamen\3-ugers projekt\data\data.txt'
#range(count_files(uncompressed_path))
for i in range(count_files(uncompressed_path)):
    uncompressed_images = [f for f in os.listdir(uncompressed_path) if os.path.isfile(os.path.join(uncompressed_path, f))]
    image_name = uncompressed_images[i][:-4]
    load_path = f'C:/Users/Lenovo/Desktop/DTU/1. semester/Introduktion til Intelligente Systemer/Eksamen/3-ugers projekt/data/uncompressed/{uncompressed_images[i]}'
    save_img_path = f'C:/Users/Lenovo/Desktop/DTU/1. semester/Introduktion til Intelligente Systemer/Eksamen/3-ugers projekt/data/compressed/{image_name}'
    folder_path = os.makedirs(save_img_path, exist_ok=True)
    compress(load_path, save_img_path, image_name, save_data_path)
