In [7]:
import os
%matplotlib inline

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import scipy
import scipy.misc
from scipy import ndimage

from datasets import dataset_utils

from datasets import imagenet
from nets import inception
from preprocessing import inception_preprocessing

from tensorflow.contrib import slim
from scipy.misc import imresize
import matplotlib.image as mpimg

In [2]:
def preprocess_images(data,img_size):
    res_img = []
    for i in range(len(data)):
        res_img.append(imresize(data[i],[img_size,img_size]))
    return res_img

In [3]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = np.array(mpimg.imread(os.path.join(folder,filename)))
        images.append(img)

    images = preprocess_images(images,image_size)
    return np.array(images)

In [4]:
from datasets import dataset_utils
import tensorflow as tf

url = "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz"
checkpoints_dir = '/tmp/checkpoints'
if not tf.gfile.Exists(checkpoints_dir):
    tf.gfile.MakeDirs(checkpoints_dir)

dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir)

>> Downloading inception_v3_2016_08_28.tar.gz 100.0%
Successfully downloaded inception_v3_2016_08_28.tar.gz 100885009 bytes.


In [5]:
tf.reset_default_graph()

image_size = inception.inception_v3.default_image_size
image=tf.placeholder(dtype='float32',shape=[None,image_size,image_size,3])

with slim.arg_scope(inception.inception_v3_arg_scope()):
    logits, layers_list = inception.inception_v3(image, num_classes=1001, is_training=False)

vector = layers_list.get('PreLogits',None)

init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v3.ckpt'),
            slim.get_model_variables())


In [8]:
X1 = load_images_from_folder('/Users/Vard/Downloads/flowers/rose')
y1 = np.array([0]*X1.shape[0])
X2 = load_images_from_folder('/Users/Vard/Downloads/flowers/tulip')
y2 = np.array([0]*X2.shape[0])
X = np.vstack([X1,X2])
y = np.hstack([y1,y2]) 

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  after removing the cwd from sys.path.


In [9]:
n = X.shape[0]
n_batch = 4
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    init_fn(sess)
    vectors = np.array([])

    rng_minibatches = range(0, n - n_batch + 1, n_batch)
    for start_idx in rng_minibatches:
        X_batch = X[start_idx:(start_idx+n_batch),:]
        [vector_val] = sess.run([vector],feed_dict={image:X_batch})
        if start_idx == 0:
            vectors = np.squeeze(vector_val)
        else:
            vectors = np.vstack([vectors, np.squeeze(vector_val)])

INFO:tensorflow:Restoring parameters from /tmp/checkpoints/inception_v3.ckpt


In [10]:
from sklearn import cluster
from sklearn import mixture
from sklearn.model_selection import GridSearchCV

two_means = cluster.KMeans(n_clusters= 2)
parameters = {'n_init': range(2,15)}
clf = GridSearchCV(two_means,parameters)
clf.fit(vectors)

n_init = clf.best_params_['n_init']
two_means = cluster.KMeans(n_clusters= 2,n_init= n_init).fit(vectors)
acc1 = np.mean(np.equal(two_means.labels_, y))

print('KMeans best score is',acc1)

KMeans best score is 0.5124434389140271


In [11]:
spectral = cluster.SpectralClustering(n_clusters=2).fit(vectors)
acc2 = np.mean(np.equal(spectral.labels_, y))
print('Spectral score is',acc2)



Spectral score is 0.998868778280543


In [14]:
dbscan = cluster.DBSCAN(eps=20,min_samples = 1).fit(vectors)

acc3 = np.mean(np.equal(dbscan.labels_, y))
print('DBSCAN score is',acc3)

DBSCAN score is 0.0005656108597285068


In [13]:
affinity_propagation = cluster.AffinityPropagation().fit(vectors)
acc4 = np.mean(np.equal(affinity_propagation.labels_, y))
print('Affinity score is', acc4)

Affinity score is 0.007918552036199095


In [15]:
average_linkage = cluster.AgglomerativeClustering(n_clusters=2,linkage='average').fit(vectors)
acc5 = np.mean(np.equal(average_linkage.labels_, y))
print('Agglomerative score is',acc5)

Agglomerative score is 0.9994343891402715


In [16]:
birch = cluster.Birch(n_clusters=2).fit(vectors)
acc6 = np.mean(np.equal(birch.labels_, y))
print('Birch score is',acc6)

Birch score is 0.7618778280542986


In [17]:
gmm = mixture.GaussianMixture(n_components=2)

parameters = {'covariance_type': ['full','diag','spherical'],
             
             }
clf = GridSearchCV(gmm, parameters)
clf.fit(vectors)

cov = clf.best_params_['covariance_type']

gmm = mixture.GaussianMixture(covariance_type=cov).fit(vectors)
acc8 = np.mean(np.equal(gmm.predict(vectors), y))
print('GaussianMixture best score is', acc8)

GaussianMixture best score is 1.0
