In [1]:
!unzip "/content/clustering.zip" -d "/content/"

Archive:  /content/clustering.zip
   creating: /content/clustering/
  inflating: /content/clustering/carimg181.jpeg  
  inflating: /content/clustering/carimg222.jpeg  
  inflating: /content/clustering/carimg271.jpeg  
  inflating: /content/clustering/carimg331.jpeg  
  inflating: /content/clustering/carimg453.jpeg  
  inflating: /content/clustering/carimg602.jpeg  
  inflating: /content/clustering/carimg941.jpeg  
  inflating: /content/clustering/planeimg001.jpeg  
  inflating: /content/clustering/planeimg056.jpeg  
  inflating: /content/clustering/planeimg111.jpeg  
  inflating: /content/clustering/planeimg201.jpeg  
  inflating: /content/clustering/planeimg371.jpeg  
  inflating: /content/clustering/planeimg506.jpeg  
  inflating: /content/clustering/planeimg901.jpeg  
  inflating: /content/clustering/shipimg002.jpeg  
  inflating: /content/clustering/shipimg041.jpeg  
  inflating: /content/clustering/shipimg211.jpeg  
  inflating: /content/clustering/shipimg661.jpeg  
  inflating: /

In [1]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import shutil

In [3]:
for i in tqdm(os.listdir('/content/clustering')):
    print(i)

100%|██████████| 19/19 [00:00<00:00, 3846.69it/s]

shipimg002.jpeg
carimg331.jpeg
planeimg056.jpeg
shipimg211.jpeg
carimg453.jpeg
carimg271.jpeg
carimg602.jpeg
planeimg201.jpeg
carimg941.jpeg
planeimg371.jpeg
carimg222.jpeg
shipimg041.jpeg
shipimg661.jpeg
planeimg506.jpeg
planeimg001.jpeg
carimg181.jpeg
planeimg901.jpeg
planeimg111.jpeg
shipimg91.jpeg





In [2]:
def image_feature(direc):
    model = InceptionV3(weights='imagenet', include_top=False)
    features = []
    img_name = []
    for i in tqdm(direc):
        fname = 'clustering'+'/'+i
        img = image.load_img(fname, target_size=(224,224))        # loads an image and resizes the image to (224, 224)
        x = img_to_array(img)                                     # adds channels: x.shape = (224, 224, 3) for RGB
        x = np.expand_dims(x, axis=0)                             # add the number of images: x.shape = (1, 224, 224, 3)
        x = preprocess_input(x)                                   # preprocess_input subtracts the mean RGB channels of the imagenet dataset
        feat = model.predict(x)
        feat = feat.flatten()
        features.append(feat)
        img_name.append(i)
    return features, img_name

In [3]:
img_path = os.listdir('/content/clustering')

In [4]:
img_features, img_name = image_feature(img_path)

100%|██████████| 19/19 [00:31<00:00,  1.66s/it]


In [5]:
image_cluster = pd.DataFrame(img_name, columns=['image'])

In [6]:
k = 3
clusters = KMeans(k, random_state = 20)
clusters.fit(img_features)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=20, tol=0.0001, verbose=0)

In [7]:
image_cluster['clusterid'] = clusters.labels_

In [8]:
image_cluster

Unnamed: 0,image,clusterid
0,shipimg002.jpeg,0
1,carimg331.jpeg,1
2,planeimg056.jpeg,2
3,shipimg211.jpeg,0
4,carimg453.jpeg,1
5,carimg271.jpeg,1
6,carimg602.jpeg,1
7,planeimg201.jpeg,2
8,carimg941.jpeg,1
9,planeimg371.jpeg,2


In [9]:
image_cluster[image_cluster['clusterid'] == 0]

Unnamed: 0,image,clusterid
0,shipimg002.jpeg,0
3,shipimg211.jpeg,0
11,shipimg041.jpeg,0
12,shipimg661.jpeg,0
18,shipimg91.jpeg,0


In [10]:
image_cluster[image_cluster['clusterid'] == 1]

Unnamed: 0,image,clusterid
1,carimg331.jpeg,1
4,carimg453.jpeg,1
5,carimg271.jpeg,1
6,carimg602.jpeg,1
8,carimg941.jpeg,1
10,carimg222.jpeg,1
15,carimg181.jpeg,1


In [11]:
image_cluster[image_cluster['clusterid'] == 2]

Unnamed: 0,image,clusterid
2,planeimg056.jpeg,2
7,planeimg201.jpeg,2
9,planeimg371.jpeg,2
13,planeimg506.jpeg,2
14,planeimg001.jpeg,2
16,planeimg901.jpeg,2
17,planeimg111.jpeg,2


In [12]:
os.mkdir('ship')
os.mkdir('car')
os.mkdir('plane')

In [13]:
len(image_cluster)

19

In [14]:
image_cluster['clusterid'][0]

0

In [15]:
# move all images in clustering folder to folders ship, car and plane

for i in range(len(image_cluster)):
    if image_cluster['clusterid'][i]==0:
        shutil.move(os.path.join('clustering', image_cluster['image'][i]), 'ship')
    elif image_cluster['clusterid'][i]==1:
        shutil.move(os.path.join('clustering', image_cluster['image'][i]), 'car')
    else:
        shutil.move(os.path.join('clustering', image_cluster['image'][i]), 'plane')