# Using encoder model to cluster images 

#### Importing libraries

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, UpSampling2D, Activation
from keras import backend as K
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# Loading saved image data
data = np.load('/content/drive/Shared drives/Coastal_Image_Analysis/Data/224resizedimagearray.npz')

In [None]:
data = data['arr_0']

In [None]:
#Splittin into train and test 
train, test = train_test_split(data, test_size=0.09, random_state=42, shuffle=True)

In [None]:
# Splitting train into train and validate
train, validate = train_test_split(train, test_size=0.1, random_state=42, shuffle=True)

In [None]:
# Build the autoencoder
model = Sequential()
model.add(Conv2D(14, kernel_size=3, padding='same', activation='relu', input_shape=(224,224,3)))
model.add(MaxPool2D((2,2), padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(7, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D((2,2), padding='same'))
model.add(Dropout(0.2))
model.add(Conv2D(7, kernel_size=3, padding='same', activation='relu'))
model.add(UpSampling2D((2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(14, kernel_size=3, padding='same', activation='relu'))
model.add(UpSampling2D((2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(3, kernel_size=3, padding='same', activation='relu'))

model.compile(optimizer='Adagrad', loss="mse")
model.summary()

In [None]:
train = train.reshape(-1,224,224,3) / 255
test = test.reshape(-1,224,224,3) / 255
validate = validate.reshape(-1,224,224,3) / 255

In [None]:
#Early stopping
cb = keras.callbacks.EarlyStopping(monitor='val_loss',
                              patience=2,
                              )

In [None]:
# Train the model
model.fit(train, train, epochs=600, batch_size=64, validation_data=(validate, validate),callbacks=[cb])

In [None]:
#Saving the model
model.save("/content/drive/Shared drives/Coastal_Image_Analysis/Data/autoencoder_model1.h5")

In [None]:
#Loading the model
model = tf.keras.models.load_model('/content/drive/Shared drives/Coastal_Image_Analysis/Data/autoencoder_model1.h5')


In [None]:
restored_testing_dataset = model.predict(test)

In [None]:
plt.imshow(test[5])

In [None]:
#Creating an encoder 
encoder = K.function([model.layers[0].input], [model.layers[4].output])

In [None]:
# Encode the training set
encoded_images = encoder([data[:1500]])[0].reshape(-1,56*56*7)


In [None]:
encoded_images1 = encoder([data[1501:2500]])[0].reshape(-1,56*56*7)


In [None]:
encoded_images2 = encoder([data[2501:]])[0].reshape(-1,56*56*7)

In [None]:
encoded=np.append(encoded_images,encoded_images1,axis=0)

In [None]:
encoded_imgs=np.append(encoded,encoded_images2,axis=0)

In [None]:
#Applying kmeans
kmeans = KMeans(n_clusters=10)
kmeans.fit_predict(encoded_imgs)

In [None]:
 k_means_labels = kmeans.labels_

In [None]:
G = len(np.unique(k_means_labels)) #Number of labels

#2D matrix  for an array of indexes of the given label
cluster_index= [[] for i in range(G)]
for i, label in enumerate(k_means_labels,0):
    for n in range(G):
        if label == n:
            cluster_index[n].append(i)
        else:
            continue

In [None]:
#Visualisation for clusters = clust
clust = 10
num = 9 #num of data to visualize from the cluster
for j in range(0,clust):
  plt.figure(figsize=(20,20))
  #plt.clf()
  print('Cluster '+str(j))
  for i in range(1,num+1):
    plt.subplot(7, 7, i) #(Number of rows, Number of column per row, item number)
    plt.imshow(data[cluster_index[j][i]].reshape(224,224,3))     
  plt.show()

#### TSNE visualisation using plotly

In [None]:
from sklearn.manifold import TSNE

In [None]:
data2=np.array(np.float32(encoded_imgs).reshape(len(encoded_imgs), -1)/255)

In [None]:
tsne = TSNE().fit_transform(data2)


In [None]:
tsne_components = pd.DataFrame(tsne,columns=['comp1','comp2'])

In [None]:
tsne_components['cluster']=k_means_labels

In [None]:
import plotly.express as px

In [None]:

tsne_components["cluster"] = tsne_components["cluster"].astype(str)
fig = px.scatter(tsne_components, x="comp1", y="comp2", color="cluster",
                  )

fig.show()