In [1]:
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.datasets import cifar10
import numpy as np
import keras
from keras.utils import to_categorical


from __future__ import print_function
import os

import matplotlib.pyplot as plt
%matplotlib inline

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

resnet = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(224, 224, 3))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
resnet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [3]:
# For getting dataset of images of 2 classes
def get_new_data(X, Y, first_label=0, second_label=2):
    x = []
    y = []
    for i in range(Y.shape[0]):
        if Y[i] == first_label or Y[i] == second_label:
            x.append(X[i])
            y.append(Y[i])
    for _ in range(len(y)):
        if y[_] == first_label:
            y[_] = 0
        else:
            y[_] = 1
    
    return np.asarray(x), np.asarray(y)

In [4]:
X_train, Y_train = get_new_data(x_train, y_train)
print(X_train.shape, Y_train)

(10000, 32, 32, 3) [1 1 1 ... 0 0 1]


In [5]:
X_test, Y_test = get_new_data(x_test, y_test)
print(X_test.shape, Y_test)

(2000, 32, 32, 3) [0 0 0 ... 0 1 0]


In [6]:
train_dir = 'C:/Users/ML/Downloads/Clustering/clean-dataset/train'
validation_dir = 'C:/Users/ML/Downloads/Clustering/clean-dataset/validation'

nTrain = 160
nVal = 40

In [7]:
from  PIL import Image

def save_single_image(single_img_reshaped, subset, class_label, l):
    i = Image.fromarray(single_img_reshaped)
    res = i.resize((224, 224), Image.LANCZOS)
    
    res.save('C:/Users/ML/Downloads/Clustering/clean-dataset/%s/%s/%s.png' %(subset, class_label, l), 'PNG')

In [None]:
#here can be visualized data before and after resizing

In [None]:
#This is for saving images of train dataset. It's enough to realize once
l = 0
while l < X_train.shape[0]:
    if Y_train[l]==0:
        save_single_image(X_train[l], 'train', 'airplane', l)
    elif  Y_train[l]==1:
        save_single_image(X_train[l], 'train', 'bird', l)
    l += 1

In [None]:
#This is for saving images of validation dataset. It's enough to realize once.
l = 0
while l < X_test.shape[0]:
    if Y_test[l]==0:
        save_single_image(X_test[l], 'validation', 'airplane', l)
    elif  Y_test[l]==1:
        save_single_image(X_test[l], 'validation', 'bird', l)
    l += 1

In [8]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20
 
train_features = np.zeros(shape=(nTrain, 7, 7, 2048))
train_labels = np.zeros(shape=(nTrain,2))
 
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True)


Found 10000 images belonging to 2 classes.


In [9]:
i = 0
for inputs_batch, labels_batch in train_generator:
    features_batch = resnet.predict(inputs_batch)
    train_features[i * batch_size : (i + 1) * batch_size] = features_batch
    train_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    print("Training batch %s" %(i+1))
    i += 1
    if i * batch_size >= nTrain:
        break

Training batch 1
Training batch 2
Training batch 3
Training batch 4
Training batch 5
Training batch 6
Training batch 7
Training batch 8


In [10]:
train_features = np.reshape(train_features, (nTrain, 7 * 7 * 2048))

In [11]:
validation_features = np.zeros(shape=(nVal, 7, 7, 2048))
validation_labels = np.zeros(shape=(nVal, 2))

validation_generator = datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False)

i = 0
for inputs_batch, labels_batch in validation_generator:
    features_batch = resnet.predict(inputs_batch)
    validation_features[i * batch_size : (i + 1) * batch_size] = features_batch
    validation_labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    print("Validation batch %s" %(i+1))
    i += 1
    if i * batch_size >= nVal:
        break


Found 2000 images belonging to 2 classes.
Validation batch 1
Validation batch 2


In [12]:
validation_features = np.reshape(validation_features, (nVal, 7 * 7 * 2048))

In [31]:
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
import numpy as np

for i in range(1, 20):
    kmeans = KMeans(n_clusters=2, n_init=i)
    labels_pred_kmeans = kmeans.fit_predict(train_features)
    labels = np.where(train_labels==1)[1]
    C = confusion_matrix(labels, labels_pred_kmeans)
    print("Epoch:", i, '\n', C, '\n', "Accuracy:", (C[0,0] + C[1,1])/np.sum(C))

Epoch: 1 
 [[18 60]
 [15 67]] 
 Accuracy: 0.53125
Epoch: 2 
 [[60 18]
 [66 16]] 
 Accuracy: 0.475
Epoch: 3 
 [[53 25]
 [53 29]] 
 Accuracy: 0.5125
Epoch: 4 
 [[59 19]
 [65 17]] 
 Accuracy: 0.475
Epoch: 5 
 [[23 55]
 [20 62]] 
 Accuracy: 0.53125
Epoch: 6 
 [[20 58]
 [17 65]] 
 Accuracy: 0.53125
Epoch: 7 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 8 
 [[21 57]
 [17 65]] 
 Accuracy: 0.5375
Epoch: 9 
 [[58 20]
 [66 16]] 
 Accuracy: 0.4625
Epoch: 10 
 [[20 58]
 [16 66]] 
 Accuracy: 0.5375
Epoch: 11 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 12 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 13 
 [[25 53]
 [27 55]] 
 Accuracy: 0.5
Epoch: 14 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 15 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 16 
 [[53 25]
 [55 27]] 
 Accuracy: 0.5
Epoch: 17 
 [[21 57]
 [17 65]] 
 Accuracy: 0.5375
Epoch: 18 
 [[21 57]
 [17 65]] 
 Accuracy: 0.5375
Epoch: 19 
 [[25 53]
 [27 55]] 
 Accuracy: 0.5


!!! Bad results, so need to train another clusering model

In [None]:
# kmeans.labels_
# kmeans.predict(validation_features)
# kmeans.cluster_centers_

In [None]:
# import pickle

# with open('my_dataset.pickle', 'wb') as output:
#     pickle.dump(X_train, output)

In [None]:
# import pickle

# with open('my_dataset.pickle', 'rb') as data:
#     X_train = pickle.load(data)

In [None]:
# def unpickle(file):
#     import pickle
#     with open(file, 'rb') as fo:
#         dict = pickle.load(fo, encoding='bytes')
#     return dict