In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import gc
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.base import TransformerMixin, BaseEstimator
from os.path import splitext
import os
import time
%matplotlib inline

class ImageIntToFloat(BaseEstimator,TransformerMixin):
    def __init__(self):
        pass
    def fit(self,X,y=None):
        return self
    def transform(self,X,y=None):
        if X.dtype == np.uint8:
            return X.astype('float32') / np.iinfo(np.uint8).max
        else:
            return X

class ImageReshaper(BaseEstimator,TransformerMixin):
    def __init__(self,colour_channels=3):
        self.colour_channels = colour_channels
    def fit(self,X,y=None):
        return self
    def transform(self,X,y=None):
        return X.reshape(-1,self.colour_channels)

In [2]:
filename = './New folder/abc.jpg'
ftitle, fext = splitext(filename)
image = mpimg.imread(filename)
print(image.shape)
print(f'Uncompressed image will use {image.size * np.dtype(np.float32).itemsize / 1024**2:.2f} mb in memory.')

(2048, 1448, 3)
Uncompressed image will use 33.94 mb in memory.


In [3]:
pipe = Pipeline([
    ('int2float',ImageIntToFloat()),
    ('reshape',ImageReshaper(colour_channels=image[0,0].size)),
])

n_colors = (2,3,4,6,7,8,10,15,20,35)#(35,20,15,10,8,7,6,4,3,2)#(7,)# 8, 6, 4, 3, 2)
reshaped_image = pipe.fit_transform(image)
flag = False

for n_clusters in n_colors:
    tick = time.time()
    print(f'Clustering the image into {n_clusters} clusters')
    try:
        kmeans = KMeans(n_clusters=n_clusters,random_state=42).fit(reshaped_image)
    except MemoryError:
        print("Memory Error while using K-Means. Using Mini-Batch K-Means instead.")
        if(not flag):
            memmapped_image = np.memmap('temp', dtype='float32', mode='write', shape=reshaped_image.shape)
            memmapped_image[:] = reshaped_image
            print('Generated Memory Map.')
            flag = True
        kmeans = MiniBatchKMeans(n_clusters=n_clusters,random_state=42).fit(memmapped_image)
    np.minimum(kmeans.cluster_centers_,1.0,out=kmeans.cluster_centers_)
    np.maximum(kmeans.cluster_centers_,0.0,out=kmeans.cluster_centers_)
    segmented_img = kmeans.cluster_centers_[kmeans.labels_].reshape(image.shape)
    try:
        mpimg.imsave(f'{ftitle}({n_clusters}){fext}',segmented_img)
    except MemoryError:
        gc.collect()
        print("Memory Error while saving Image. Using Memory Map.")
        mm_segmented_img = np.memmap('seg', dtype='float32', mode='write', shape=segmented_img.shape)
        mm_segmented_img[:] = segmented_img
        print('Generated Memory Map.')
        mpimg.imsave(f'{ftitle}({n_clusters}){fext}',mm_segmented_img)
        del mm_segmented_img
        gc.collect()
        os.remove('seg')
        print('Deleted Memory Map.')
    print(f'{ftitle}({n_clusters}){fext} saved. Time taken - {time.time() - tick:.2f} seconds.')

if flag:
    del memmapped_image
    gc.collect()
    os.remove('temp')
    print('Deleted Memory Map.')

print('Task Completed.')

Clustering the image into 2 clusters
./New folder/hako(2).jpg saved. Time taken - 27.62 seconds.
Clustering the image into 3 clusters
./New folder/hako(3).jpg saved. Time taken - 48.86 seconds.
Clustering the image into 4 clusters
./New folder/hako(4).jpg saved. Time taken - 74.03 seconds.
Clustering the image into 6 clusters
./New folder/hako(6).jpg saved. Time taken - 137.71 seconds.
Clustering the image into 7 clusters
./New folder/hako(7).jpg saved. Time taken - 126.69 seconds.
Clustering the image into 8 clusters
./New folder/hako(8).jpg saved. Time taken - 166.19 seconds.
Clustering the image into 10 clusters
./New folder/hako(10).jpg saved. Time taken - 218.93 seconds.
Clustering the image into 15 clusters
./New folder/hako(15).jpg saved. Time taken - 395.14 seconds.
Clustering the image into 20 clusters
Memory Error while using K-Means. Using Mini-Batch K-Means instead.
Generated Memory Map.
./New folder/hako(20).jpg saved. Time taken - 45.40 seconds.
Clustering the image into 