In [1]:
from keras.models import Model
from keras.layers import Input, Dense
from matplotlib import pyplot as plt
import numpy as np
import cv2
import os

def create_autoencoder(input_dim, output_func, latent_dim, latent_func, hidden_dim, hidden_func):
    input_layer = Input(shape=(input_dim,))
    prev_layer = input_layer
    for dim, func in zip(hidden_dim, hidden_func):
        prev_layer = Dense(dim, activation=func)(prev_layer)
    encoder = Dense(latent_dim, activation=latent_func)(prev_layer)
    for dim, func in zip(reversed(hidden_dim), reversed(hidden_func)):
        prev_layer = Dense(dim, activation=func)(prev_layer)
    autoencoder = Dense(input_dim, activation=output_func)(prev_layer)
    return Model(input_layer, autoencoder)


class KDAE:

    def __init__(self, k, ae_config, optimizer='adam', loss='mse') -> None:
        self.k = k
        self.autoencoders = []
        self.clusters = [[] for _ in range(k)]
        print("everything ok")
        for _ in range(k):
            ae = create_autoencoder(**ae_config)
            ae.compile(optimizer=optimizer, loss=loss)
            self.autoencoders.append(ae)
            print("everything ok")
    
    def cluster_images(self, images):
        self.clusters = [[] for _ in range(self.k)]
        for image in images:
            image_array = image.array
            min_mse = np.infty
            min_k = 0
            for ae, k in enumerate(self.autoencoders):
                ae_array = ae.predict(image_array)
                mse = np.sum((ae_array - image_array)**2)
                if min_mse > mse:
                    min_mse = mse
                    min_k = k
            self.clusters[min_k].append(image)
            image.prev_cluster = image.cluster
            image.cluster = min_k

    def fit_autoencoders(self, epochs=50):
        for ae, cluster in zip(self.autoencoders, self.clusters):
            cl = np.array(cluster)
            ae.fit(cl, cl, epochs=epochs)
            
    def all_clusters_remain_same(self, images):
        for image in images:
            if not image.is_same_cluster():
                return False
        return True

    def cluster_images(self, images, epochs_per_iter = 50, stop=50):
        counter = 0
        while self.all_clusters_remain_same(images) and counter < stop:
            print(f"Iteration: {counter}")
            self.cluster_images(images)
            print(f"Images clustered")
            self.fit_autoencoders(epochs_per_iter)
            print(f"Autoencoders fitted")
            counter += 1

    def plot_cluster(self, k, figsize=(16,16)):
        fig = plt.figure(figsize=figsize)
        images = [image.image for image in self.clusters[k]]
        columns = 4
        rows = 1+ len(images)//columns
        for i in range(columns*rows):
            img = images[i].image
            fig.add_subplot(rows, columns, i+1)
            plt.imshow(img)
        plt.show()
        

class Image():
    
    def __init__(self, path, shape=(128, 128)) -> None:
        self.path = path
        self.shape = shape
        self.array = cv2.imread(path, cv2.IMREAD_COLOR)
        self.array = cv2.resize(self.array, shape)
        self.array = self.array[:,:,::-1]
        self.array = self.array.flatten()
        self.prev_cluster = -1
        self.cluster = -2
    
    def is_same_cluster(self):
        if self.prev_cluster == self.cluster:
            return True
        return False

    @property
    def image(self):
        return self.array.reshape((self.shape + (3,)))

2021-11-04 15:19:22.228972: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-04 15:19:22.229008: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [None]:
IMAGE_WIDTH = 64
ae_config = {
    "input_dim":IMAGE_WIDTH*IMAGE_WIDTH*3,
    "output_func": "sigmoid",
    "latent_dim": 16,
    "latent_func": "sigmoid",
    "hidden_dim": [3072, 1536, 768],
    "hidden_func": ["sigmoid"]*3
}
k_dae = KDAE(8, ae_config)
images = [Image("./simpson/"+image,shape=(IMAGE_WIDTH, IMAGE_WIDTH)) for image in os.listdir("./simpson")]
k_dae.cluster_images(images,epochs_per_iter=20, stop=4)

for k in range(8):
    k_dae.plot_cluster(k)

everything ok


2021-11-04 15:19:35.767559: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-11-04 15:19:35.767604: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-04 15:19:35.767637: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (hasser): /proc/driver/nvidia/version does not exist
2021-11-04 15:19:35.767970: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


everything ok
everything ok
everything ok
everything ok
everything ok
everything ok
everything ok
