# K-Means using images
In this part, we’ll implement kmeans to compress an image. The image that we’ll be working on is X by X by 3. Therefore, for each pixel location we would have 3 8-bit integers that specify the red, green, and blue intensity values. Our goal is to reduce the number of colors to K and represent (compress) the photo using those K colors only.

Source: https://towardsdatascience.com/k-means-clustering-algorithm-applications-evaluation-methods-and-drawbacks-aa03e644b48a

In [None]:
# Modules
# Run this every first each time you reset the kernel.
import matplotlib.pyplot as plt
import numpy
from matplotlib.image import imread
import pandas as pd

from sklearn.datasets.samples_generator import (make_blobs,
                                                make_circles,
                                                make_moons)
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_samples, silhouette_score

import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from pylab import *
import seaborn as sns

from PIL import Image
import requests
from io import BytesIO

In [None]:
#find a URL on the internet you want to import and test on.
#ONLY use this with smaller images! try to keep them under 400x400 pixels, otherwise it will take
#a while to run the algorithm.'
#If you want to use the default images, then use the below cell ONLY.
url="https://t1.ea.ltmcdn.com/en/razas/0/5/5/img_550_pembroke-welsh-corgi_0_600.jpg"
response = requests.get(url)
img = Image.open(BytesIO(response.content))
img = numpy.array(img)
imshow(img)
print(type(img))

In [None]:
# Read the image
# use jpg name "1-10".jpg
#1.jpg
#2.jpg
#3.jpg
#4.jpg
#NOTE: Only use this cell or the cell after. do not use both.
img = imread('data/images/1.jpg')
imshow(img)
print(type(img))

In [None]:

img_size = img.shape
# Reshape it to be 2-dimension
X = img.reshape(img_size[0] * img_size[1], img_size[2])

# Run the Kmeans algorithm
k = 10
# note: set verbose=1 to see the iterations of K-Means.
km = KMeans(n_clusters=k, verbose=0)
km.fit(X)

# Use the centroids to compress the image
X_compressed = km.cluster_centers_[km.labels_]
X_compressed = np.clip(X_compressed.astype('uint8'), 0, 255)

# Reshape X_recovered to have the same dimension as the original image 128 * 128 * 3
X_compressed = X_compressed.reshape(img_size[0], img_size[1], img_size[2])

# Plot the original and the compressed image next to each other
fig, ax = plt.subplots(1, 2, figsize = (12, 8))
ax[0].imshow(img)
ax[0].set_title('Original Image')
ax[1].imshow(X_compressed)
ax[1].set_title('Compressed Image ' + str(k) + ' colors')
for ax in fig.axes:
    ax.axis('off')
plt.tight_layout();