# **PROJECT 1: COLOR COMPRESSION** <br>
**Subject:** Applied Mathematics and Statistics <br>

## **Table of contents**
* [Import librabries and designate a path](#c1)
* [Image preparation and processing](#c2)
* [KMeans function](#c3)
* [Test program for k = {3, 5, 7}](#c4)
* [Main function](#c5)

### **Import librabries and designate a path**<a class="achor" id="c1"></a>

In [1]:
# Import librabries
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
init_centroidsT = ['random', 'in_pixels'] # For centroids choices

### **Image preparation and processing**<a class="achor" id="c2"></a>

In [2]:
# Flatten image
def flatten_img(img):
  height, width = img.size
  return np.reshape(img, (width * height, 3)).astype(int)

# Compress image base on the provided centroids and label
def compress_img(centroids, labels, img):
  height, width = img.size
  compressed = centroids[labels.astype(int)]
  compressed = compressed.reshape((width, height, 3)).astype(int)
  return compressed

# Show image function
def show_img(img, img_name, img_centroids, num_cluster):
  plt.title(f'{img_name} {img_centroids} with k cluster = {num_cluster}')
  plt.imshow(img)
  plt.axis('off') # Remove the axis
  plt.show

### **KMeans function**<a class="achor" id="c3"></a>

In [3]:
def kmeans(img_1d, k_clusters, max_iter, init_centroids='random'):
  # Choose k random centroids
  length, dim = img_1d.shape
  if init_centroids == 'random':
    centroids = np.random.choice(256, size = (k_clusters, dim), replace=False)
  elif init_centroids == 'in_pixels':
    centroids = img_1d[np.random.choice(length, size = k_clusters, replace=False)]

  labels = np.zeros(shape=(length))

  while max_iter:
    for i in range(length):
      min_dist = float('inf')

      # Distance between each pixel and centroids
      for j in range(k_clusters):
        mean_dist = np.sqrt(np.sum((img_1d[i] - centroids[j])**2, axis=0))
        if mean_dist < min_dist:
          min_dist = mean_dist
          labels[i] = j

    # Update centroids
    for i in range(k_clusters):
      pixels = img_1d[labels == i]
      if len(pixels):
        centroids[i] = np.mean(pixels, axis = 0)
    max_iter -= 1

  return centroids, labels

### **Test program for k = {3, 5, 7}**<a class="achor" id="c4"></a>
Test function to test for kmean in two centroids cases <br>
Let max_iteration = 10

#### **centroids =  random**

In [4]:
def random_test(img):
  output_img = []
  output_img.append(img)
  for k_cluster in [3, 5, 7]:
    flat_img = flatten_img(img)
    centroids, labels = kmeans(flat_img, k_cluster, 10, 'random')
    final_img = compress_img(centroids, labels, init_img)

    final_img = Image.fromarray(final_img.astype('uint8'), 'RGB')
    output_img.append(final_img)

  # Creating subplot
  plot_iter = 1
  fig, axis = plt.subplots(1, 3, figsize=(12, 8))

  for i,k in [(0,3), (1,5), (2,7)]:
    axis[i].set_title(f'Random with k cluster = {k}')
    axis[i].imshow(output_img[plot_iter])
    axis[i].axis('off')
    plot_iter += 1
  plt.tight_layout();

#### **centroids = in_pixels**

In [5]:
def in_pixels_test(img):
  output_img = []
  output_img.append(img)
  for k_cluster in [3, 5, 7]:
    flat_img = flatten_img(img)
    centroids, labels = kmeans(flat_img, k_cluster, 10, 'in_pixels')
    final_img = compress_img(centroids, labels, init_img)

    final_img = Image.fromarray(final_img.astype('uint8'), 'RGB')
    output_img.append(final_img)

  # Creating subplot
  plot_iter = 1
  fig, axis = plt.subplots(1, 3, figsize=(12, 8))

  for i,k in [(0,3), (1,5), (2,7)]:
    axis[i].set_title(f'In_pixels with k cluster = {k}')
    axis[i].imshow(output_img[plot_iter])
    axis[i].axis('off')
    plot_iter += 1
  plt.tight_layout();

### **Main function** <a class="achor" id="c5"></a>

In [6]:
def main():
  # Input parameters
  img_path = input("Enter image's path (use C:/ instead of C:\): ")
  max_iter = int(input("Enter max iteration: "))
  k_cluster = int(input("Number of k cluster: "))
  centroidsT = int(input("Initial centroid: \n0) random \n1) in_pixels \nYour choice: "))

  # File name add-on
  if centroidsT == 0:
    centroids_type = 'random'
  elif centroidsT == 1:
    centroids_type = 'in_pixels'
  else:
    print('Invalid centroid initialization method')
    return

  # Open the image
  init_img = Image.open(img_path).convert('RGB')

  # Flatten image for processing
  flat_img = flatten_img(init_img)

  # Kmeans processing
  centroids, labels = kmeans(flat_img, k_cluster, max_iter, init_centroidsT[centroidsT])
  final_img = compress_img(centroids, labels, init_img)
  final_img = Image.fromarray(final_img.astype('uint8'), 'RGB')

  # Process image name
  ## Remove the front directory and the .<extension>
  output_img = (img_path.split('/')[-1]).split('.')[0]

  # Get the directory path for saving purposes
  directory_path = img_path.replace('/' + img_path.split('/')[-1], '')

  # Output file type choice
  save_choice = int(input('Enter output file type: \n1) PNG \n2) JPG \n3) PDF \nYour choice: '))

  # Output file check and save image
  if save_choice == 1:
    final_img.save(f"{directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.png","PNG")
    print(f"Image saved at {directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.png")
  elif save_choice == 2:
    final_img.save(f"{directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.jpg","JPEG")
    print(f"Image saved at {directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.jpg")
  elif save_choice == 3:
    final_img.save(f"{directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.pdf","PDF")
    print(f"Image saved at {directory_path}/{output_img}_{centroids_type}_{k_cluster}_compressed.pdf")
  else:
    print("Invalid file save type!")
    return

  # Show image
  show_img(final_img, output_img, init_centroidsT[centroidsT], k_cluster)

In [None]:
main()