In [None]:
#importing Libraries

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import load_img, img_to_array
import os

In [None]:
#Loading dataset

def loadDataset():
    data = []
    labels = []
    root = '/home/rkarim/Training_data/'

    for rootName, dirName, fileNames in os.walk(root):
        if not rootName == root:
            label = rootName.split('/')[-1]
            for fileName in fileNames:
                if fileName.endswith('.jpg'):
                    img = load_img(os.path.join(rootName, fileName), target_size=(32, 32))  # Ensure uniform size
                    img = img_to_array(img)
                    data.append(img)
                    labels.append(label)
    
    return np.array(data), np.array(labels)

x_data, y_data = loadDataset()

In [None]:
#Normalizing the data

x_data = x_data.astype('float32') / 255.

In [None]:
#Converting labels into Numbers

unique_labels = np.unique(y_data)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
y_data = np.array([label_to_index[label] for label in y_data])

In [None]:
#defining input shape

input_shape = x_data.shape[1:]

In [None]:
#defining autooencoder model

input_img = Input(shape=input_shape)

In [None]:
#Encoder

x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), padding='same')(x)
for _ in range(11):
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
encoded = x

In [None]:
#Decoder

x = encoded
for _ in range(11):
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

In [None]:
#Creating and compiling the model

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
#Splitting the data

split_idx = int(0.8 * len(x_data))
x_train, x_test = x_data[:split_idx], x_data[split_idx:]
y_train, y_test = y_data[:split_idx], y_data[split_idx:]

In [None]:
#Training the Autoencoder

callbacks = [
    EarlyStopping(patience=10, verbose=1),
    ModelCheckpoint('model.h5', save_best_only=True, verbose=1)
]
autoencoder.fit(x_train, x_train,
                epochs=100,
                batch_size=32,
                shuffle=True,
                validation_data=(x_test, x_test),
                callbacks=callbacks)

In [None]:
autoencoder.load_weights('model.h5')

In [None]:
#defining the encoder model

encoder = Model(input_img, encoded)

In [None]:
#Getting encoded data

x_encoded = encoder.predict(x_train)

In [None]:
#Flattening the data into 2D for K-means

x_encoded_reshaped = x_encoded.reshape(x_encoded.shape[0], -1)

In [None]:
#Applying kmeans

kmeans = KMeans(n_clusters=len(unique_labels))
clusters = kmeans.fit_predict(x_encoded_reshaped)

In [None]:
#Measuring ARI

ari = adjusted_rand_score(y_train, clusters)
print(f'Adjusted Rand Index (ARI): {ari}')