## **Clustering images using pretrained MOBILENET model**



#### Importing libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import glob
import cv2
from sklearn.metrics import silhouette_score
from tensorflow.keras import models
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import tensorflow.keras.backend as K
from google.colab.patches import cv2_imshow
import os
import random
import pandas as pd

In [None]:
# Directory where the images are stored in Google drive
input_dir = '/content/drive/My Drive/Colab Notebooks/Florence/20180917a_jpgs/resized'
glob_dir = input_dir + '/*.jpg'

#### Loading images and converting them to numpy array

In [None]:
images_resized = [cv2.resize(cv2.imread(file), (224, 224)) for file in glob.glob(glob_dir)]
paths = [file for file in glob.glob(glob_dir)]
images_resized = np.array(np.float32(images_resized).reshape(len(images_resized), -1)/255)

#### Loading the model

In [None]:
model = tf.keras.applications.MobileNetV2(include_top=False,weights='imagenet', input_shape=(224, 224, 3))

#### Predicting our images through the model

In [None]:
  predictions = model.predict(images_resized.reshape(-1, 224, 224, 3))


In [None]:
pred_images = predictions.reshape(images_resized.shape[0], -1)

#### Silhoutte method to know the optimal clusters

In [None]:
sil = []
kl = []
kmax = 10
for k in range(2, kmax+1):
    kmeans2 = KMeans(n_clusters = k).fit(pred_images)
    labels = kmeans2.labels_
    sil.append(silhouette_score(pred_images, labels, metric =   'euclidean'))
    kl.append(k)

In [None]:
plt.plot(kl, sil)
plt.ylabel('Silhoutte Score')
plt.ylabel('K')
plt.show()

#### Elbow method to know the optimal clusters

In [None]:
Sum_of_squared_distances = []
K = range(13,20)
for k in K:
    km = KMeans(n_clusters=k)
    km = km.fit(pred_images)
    Sum_of_squared_distances.append(math.log(km.inertia_))

In [None]:
#13-19
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('log of Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')
plt.show()

#### Clustering the images using kmeans

In [None]:
k = 9
kmodel = KMeans(n_clusters = k, n_jobs=-1, random_state=728)
kmodel.fit(pred_images)
kpredictions = kmodel.predict(pred_images)
#shutil.rmtree('output')

In [None]:
for i in range(k):
    os.makedirs("/content/drive/My Drive/Colab Notebooks/Florence/20180917a_jpgs/" + str(i))
for i in range(len(paths)):
    shutil.copy2(paths[i], "/content/drive/My Drive/Colab Notebooks/Florence/20180917a_jpgs/"+str(kpredictions[i]))

In [None]:
# Retrieving the labels
k_means_labels = kmodel.labels_ #List of labels of each dataset
print("The list of labels of the clusters are " + str(np.unique(k_means_labels)))

In [None]:
#Retrieving the indexes
G = len(np.unique(k_means_labels)) #Number of labels

#2D matrix  for an array of indexes of the given label
cluster_index= [[] for i in range(G)]
for i, label in enumerate(k_means_labels,0):
    for n in range(G):
        if label == n:
            cluster_index[n].append(i)
        else:
            continue

In [None]:
len(cluster_index[2])

In [None]:
#Visualisation for clusters = clust
clust = 9
num = 10 #num of data to visualize from the cluster
for j in range(0,clust):
  plt.figure(figsize=(20,20))
  #plt.clf()
  print('Cluster '+str(j))
  for i in range(1,num+1):
    plt.subplot(7, 7, i) #(Number of rows, Number of column per row, item number)
    plt.imshow(images_resized[cluster_index[j][i]].reshape(224,224,3))     
  plt.show()

In [None]:
#Elbow method to check 13-25 clusters
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('log of Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')
plt.show()

In [None]:
# Elbow method to check 1-20 clusters
plt.plot(K, Sum_of_squared_distances, 'bx-')
plt.xlabel('k')
plt.ylabel('log of Sum_of_squared_distances')
plt.title('Elbow Method For Optimal k')
plt.show()

In [None]:
import math


In [None]:
model.summary()

#### Gradmap on one of the image

In [None]:
p = '/content/drive/My Drive/Colab Notebooks/Florence/20180917a_jpgs/0/26055858_resized.jpg'

img = image.load_img(p, target_size=(224, 224))

cv2_imshow(cv2.imread(p))

In [None]:

img_tensor = image.img_to_array(img)
img_tensor_x = np.expand_dims(img_tensor, axis=0)
img_tensor_x.shape

In [None]:
# Extracts the outputs of the layers:
model_layer_outputs = [layer.output for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)]
# Creates a model that will return these outputs, given the model input:
activation_model = models.Model(inputs=model.input, outputs=model_layer_outputs)
activations = activation_model.predict(img_tensor_x)
first_activation_layer = activations[0]
print(first_activation_layer.shape)

In [None]:
len([x.name for x in model.layers if isinstance(x, tf.keras.layers.Conv2D)])

In [None]:
ix = 1
plt.figure(num=None, figsize=(10, 10), dpi=80, facecolor='w', edgecolor='k')
for a in range(16):
  ax = plt.subplot(4,4,ix)
  ax.set_xticks([])
  ax.set_yticks([])
  plt.imshow(first_activation_layer[0, :, :, ix-1], cmap='viridis')
  ix+=1
plt.suptitle('Last layer feature maps of model')  
plt.show()

In [None]:
with tf.GradientTape() as tape:
  layers = [layer.name for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)] #Getting all the Conv2D layers
  last_conv_layer = model.get_layer(layers[-1])
  iterate = tf.keras.models.Model([model.inputs], [model.output, last_conv_layer.output])
  model_out, last_conv_layer = iterate(img_tensor_x)
  #class_out = model_out[:, np.argmax(model_out[0])]
  grads = tape.gradient(model_out,last_conv_layer)
  pooled_grads = K.mean(grads, axis=(0, 1, 2))
  
heatmap = tf.reduce_mean(tf.multiply(pooled_grads, last_conv_layer), axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
heatmap = heatmap.reshape((7, 7))
plt.matshow(heatmap)
plt.show()

In [None]:
#img_org = images_resized[0].reshape(224,224,3)
img_org = cv2.imread(p)
INTENSITY = 0.5
heatmap = cv2.resize(heatmap, (img_org.shape[1], img_org.shape[0]))
heatmap = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
img = heatmap * INTENSITY + img_org
cv2_imshow(img)


Displaying 3 heatmaps and their respective images for each of the 9 clusters using gradmap.

In [None]:
from google.colab.patches import cv2_imshow
path="/content/drive/My Drive/Colab Notebooks/Florence/20180917a_jpgs/"
clust=8
n=3
def gradmap():
  for j in ['Zero','One','Two','Three','Four','Five','Six','Seven','Eight']:
    p=path+j
    files=os.listdir(p)
    print("Cluster "+j)
    for i in range(n):
      d=random.choice(files) 
      new_img = image.load_img(p+"/"+d, target_size=(224, 224))
      new_img_tensor = image.img_to_array(new_img)
      new_img_tensor_x1 = np.expand_dims(new_img_tensor, axis=0)
      new_img_tensor_x1.shape
      with tf.GradientTape() as tape:
        layers = [layer.name for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)]
        last_conv_layer = model.get_layer(layers[-1])
        iterate = tf.keras.models.Model([model.inputs], [model.output, last_conv_layer.output])
        model_out, last_conv_layer = iterate(new_img_tensor_x1)
      #class_out = model_out[:, np.argmax(model_out[0])]
        grads = tape.gradient(model_out,last_conv_layer)
        pooled_grads = K.mean(grads, axis=(0, 1, 2))
      heatmap = tf.reduce_mean(tf.multiply(pooled_grads, last_conv_layer), axis=-1)
      heatmap = np.maximum(heatmap, 0)
      heatmap /= np.max(heatmap)
      heatmap = heatmap.reshape((7, 7))
      plt.matshow(heatmap)
      plt.show()
      #def gradmaping(d):
      new_img_org = cv2.imread(p+"/"+d)
      INTENSITY = 0.5
      heatmap = cv2.resize(heatmap, (new_img_org.shape[1], new_img_org.shape[0]))
      heatmap = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
      new_img = heatmap * INTENSITY + new_img_org
      #im=cv2_imshow(new_img)
      
      scale_percent = 40 # percent of original size
      width = int(new_img.shape[1] * scale_percent / 100) 
      height = int(new_img.shape[0] * scale_percent / 100) 
      dim = (width, height) 

      # resize image
      resized = cv2.resize(new_img, dim, interpolation = cv2.INTER_AREA) 

      cv2_imshow(resized)
      cv2.waitKey()
    
 
    

In [None]:
gradmap()

#### TSNE Plot

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE().fit_transform(pred_images)


In [None]:
tsne_components = pd.DataFrame(tsne,columns=['comp1','comp2'])

In [None]:
tsne_components['cluster']=k_means_labels

In [None]:
import plotly.express as px
tsne_components["cluster"] = tsne_components["cluster"].astype(str)
fig = px.scatter(tsne_components, x="comp1", y="comp2", color="cluster",
                  )

fig.show()