<a href="https://colab.research.google.com/github/cagBRT/Clustering-Intro/blob/master/C3B_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Clone the entire repo.
!git clone -l -s https://github.com/cagBRT/Clustering-Intro.git cloned-repo
%cd cloned-repo

In this notebook you use clustering to reduce the number of colors in images.

Import the libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()  # for plot styling
import numpy as np
from PIL import Image

Load the picture from the sample_images from sklearn

In [None]:
# Note: this requires the ``pillow`` package to be installed
from sklearn.datasets import load_sample_image
china = load_sample_image("china.jpg")
ax = plt.axes(xticks=[], yticks=[])
ax.imshow(china);

The shape of the image is 427 pixels x 640 pixels by 3 colors (rgb)

In [None]:
china.shape

Normalize the data and reshape it into a 273,280 pixels by 3 colors

In [None]:
data = china / 255.0 # use 0...1 scale
data = data.reshape(china.shape[0] * china.shape[1], 3)
data.shape

Plot the colors used in the image

In [None]:
def plot_pixels(data, title, colors=None, N=10000):
    if colors is None:
        colors = data

    # choose a random subset
    rng = np.random.RandomState(0)
    i = rng.permutation(data.shape[0])[:N]
    colors = colors[i]
    R, G, B = data[i].T

    fig, ax = plt.subplots(1, 2, figsize=(16, 6))
    a = ax[0].scatter(R, G, color=colors, marker='.')
    ax[0].set(xlabel='Red', ylabel='Green', xlim=(0, 1), ylim=(0, 1))

    ax[1].scatter(R, B, color=colors, marker='.')
    ax[1].set(xlabel='Red', ylabel='Blue', xlim=(0, 1), ylim=(0, 1))
    fig.colorbar(a)
    fig.suptitle(title, size=20);


In [None]:
plot_pixels(data, title='Input color space: 16 million possible colors')

Use clustering to reduce the possible number of colors in the image.

In [None]:
NO_BATCHES=16

import warnings; warnings.simplefilter('ignore')  # Fix NumPy issues.

from sklearn.cluster import MiniBatchKMeans
kmeans = MiniBatchKMeans(NO_BATCHES)
kmeans.fit(data)
new_colors = kmeans.cluster_centers_[kmeans.predict(data)]
plot_pixels(data, colors=new_colors,
            title=("Reduced color space:", NO_BATCHES, "colors"))

In [None]:
china_recolored = new_colors.reshape(china.shape)

fig, ax = plt.subplots(1, 2, figsize=(16, 6),
                       subplot_kw=dict(xticks=[], yticks=[]))
fig.subplots_adjust(wspace=0.05)
ax[0].imshow(china)
ax[0].set_title('Original Image', size=16)
ax[1].imshow(china_recolored)
ax[1].set_title('Reduced color Image', size=16);

# **Assignment**
Use k-means clustering to color compress the flower image to 32 colors.

In [None]:
from sklearn.datasets import load_sample_image
flower = load_sample_image("flower.jpg")
ax = plt.axes(xticks=[], yticks=[])
ax.imshow(flower);

Assignment<br>
Use your own .jpg image



In [None]:
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np

img = np.asarray(Image.open('images/florence-2044046_1280 (1).jpg'))
print(repr(img))

In [None]:
imgplot = plt.imshow(img)

In [None]:
img.shape

In [None]:
data = img / 255.0 # use 0...1 scale
data = data.reshape(img.shape[0]*img.shape[1], 3)
data.shape

In [None]:
plot_pixels(data, title='Input color space: 16 million possible colors')

In [None]:
kmeans = MiniBatchKMeans(16) #16 clusters
kmeans.fit(data)
new_colors = kmeans.cluster_centers_[kmeans.predict(data)]
plot_pixels(data, colors=new_colors,
            title="Reduced color space: 16 colors")

In [None]:
img_recolored = new_colors.reshape(img.shape)

fig, ax = plt.subplots(1, 2, figsize=(16, 6),
                       subplot_kw=dict(xticks=[], yticks=[]))
fig.subplots_adjust(wspace=0.05)
ax[0].imshow(img)
ax[0].set_title('Original Image', size=16)
ax[1].imshow(img_recolored)
ax[1].set_title('16-color Image', size=16);