In [1]:
#https://github.com/XifengGuo/DCEC/blob/master/DCEC.py
from time import time
import numpy as np
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras.models import Model
from tensorflow.python.keras.utils.vis_utils import plot_model
from sklearn.cluster import KMeans
from datasets import load_mnist, load_usps, load_mrec


In [2]:
import metrics
from ConvAEmnist import CAE
yHid=None


In [3]:
def printMetrics(aName,yy,_loss):
        if yHid is not None:
                    acc = np.round(metrics.acc(yHid, yy), 5)
                    nmi = np.round(metrics.nmi(yHid, yy), 5)
                    ari = np.round(metrics.ari(yHid, yy), 5)
                    loss = np.round(_loss, 7)
                    #logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=_loss[0], Lc=_loss[1], Lr=_loss[2])
                    #logwriter.writerow(logdict)
                    # print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss,'  delta=',delta_label)

                    print(aName,'acc = %.4f, nmi = %.4f, ari = %.4f' % (acc,nmi,ari),';  loss=',_loss)


In [4]:
class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.

    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

 

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight('clusters',(self.n_clusters, input_dim), initializer='glorot_uniform')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    

In [47]:
class DCEC(object):
    def __init__(self,
                 input_shape,
                 filters=[32, 64, 128, 10],
                 n_clusters=10,
                 alpha=1.0):

        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []
        self.delata_label = 0 
        self.save_dir='temp'
        self.yHid=None

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name='embedding').output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        print('nn',self.n_clusters)
        clustering_layer = ClusteringLayer(n_clusters=self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])

    def pretrain(self, x, batch_size=256, epochs=50, optimizer='adam'):
        print('...Pretraining...')
        self.cae.compile(optimizer=optimizer, loss='mse')
        from tensorflow.keras.callbacks import CSVLogger
        csv_logger = CSVLogger(self.save_dir + '/pretrain_log.csv')

        # begin training
        self.cae.fit(x, x, batch_size=batch_size, epochs=2, callbacks=[csv_logger])
        nq, _ = self.model.predict(x, verbose=0)
        npp= nq.argmax(1)
        printMetrics('Iter a',npp,0)
             
        t0 = time()
        self.cae.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=[csv_logger])
        print('Pretraining time: ', time() - t0)
        nq, _ = self.model.predict(x, verbose=0)
        npp= nq.argmax(1)
        printMetrics('Iter a',npp,0)
        self.cae.save(self.save_dir + '/pretrain_cae_model.h5')
        print('Pretrained weights are saved to %s/pretrain_cae_model.h5' % self.save_dir)
        self.pretrained = True

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict(self, x):
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)
    

    @staticmethod
    def target_distribution(q):
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def compile(self, loss=['kld', 'mse'], loss_weights=[1, 1], optimizer='adam'):
        self.model.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer)

    def dopretrain(self,x, cae_weights=None,batch_size=256,epochs=50):
        # Step 1: pretrain if necessary
        if not self.pretrained or (cae_weights is None):
            #Nepoch=50
            print('...pretraining CAE using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=',epochs)
            self.pretrain(x, batch_size, epochs=epochs)
            self.cae.save_weights( 'pretrain_cae.h5')
      
            self.pretrained = True
        elif cae_weights is not None:
            self.cae.load_weights('pretrain_cae.h5')
            print('cae_weights is loaded successfully.')
  
    def fit(self, x, batch_size=256, maxiter=2e3, tol=1e-2,
            update_interval=140, save_dir='temp'):

        print('Update interval', update_interval)
        save_interval=update_interval*2 
        #save_interval = int(x.shape[0] / batch_size * 10)
        print('Save interval', save_interval)

   
        # Step 2: initialize cluster centers using k-means
        #t1 = time()
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
        loss = [0, 0, 0]
 
        printMetrics("kMeans",self.y_pred,loss)    
        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
        logfile = open(self.save_dir + '/dcec_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p
                ii=index * batch_size
                #print('x',x[ii])
                print('p',p[ii])
                print('Q',q[ii])
                self.q_pred=q;
                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                #self.zcluster=
                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                printMetrics('Iter '+ str(ite),self.y_pred,loss)
             
                # check stop criterion
                #delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    #logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * batch_size::],
                                                 y=[p[index * batch_size::], x[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size],
                                                 y=[p[index * batch_size:(index + 1) * batch_size],
                                                    x[index * batch_size:(index + 1) * batch_size]])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save DCEC model checkpoints
                print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/dcec_model_final.h5')
        self.model.save_weights(save_dir + '/dcec_model_final.h5')
        #t3 = time()
 

In [48]:
# load dataset
Ndataset='mnist-test'
Nclusters=10
Nsave_dir='temp'
Ngamma=0.1
Ntol=0.001

In [37]:
ddd=[1,2,4]
np.savetxt(fname="saved.csv", delimiter=",", X=ddd)

#ddd.savetxt('mnisttxt')

In [38]:
#clusteringll = ClusteringLayer(10, name='clustering')(hidden)
#from tensorflow.keras.datasets import mnist
#(x_t, y_t), (x_s, y_s) = mnist.load_data()
#xtt = x_t.reshape(-1, 784).astype('int32')

#np.savetxt(fname="saved-rain-fall-row-col-names.csv", delimiter=",", X=xtt)

#x_t.savetxt('mnisttxt')

In [49]:
from datasets import load_mnist, load_usps, load_mrec
if Ndataset == 'mnist':
    x, yHid = load_mnist()
elif Ndataset == 'usps':
    x, yHid = load_usps('data/usps')
elif Ndataset == 'mrec':
    x, YY = load_mrec()
elif Ndataset == 'mnist-test':
    x, yHid = load_mnist()
    x, yHid = x[60000:], yHid[60000:]

MNIST: (70000, 28, 28, 1)


In [50]:
# prepare the DCEC model
dcec = DCEC(input_shape=x.shape[1:], filters=[32, 64, 128, 10], n_clusters=Nclusters)
#plot_model(dcec.model, to_file=Nsave_dir + '/dcec_model.png', show_shapes=True)
dcec.model.summary()
dcec.save_dir=Nsave_dir
dcec.yHid=yHid


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 14, 14, 32)        832       
_________________________________________________________________
conv2 (Conv2D)               (None, 7, 7, 64)          51264     
_________________________________________________________________
conv3 (Conv2D)               (None, 3, 3, 128)         73856     
_________________________________________________________________
flatten_4 (Flatten)          (None, 1152)              0         
_________________________________________________________________
embedding (Dense)            (None, 10)                11530     
_________________________________________________________________
dense_4 (Dense)              (None, 1152)              12672     
_________________________________________________________________
reshape_4 (Reshape)          (None, 3, 3, 128)        

In [51]:
optimizer = 'adam'
Ntol=0.0001
Ngamma=0.1
    

In [52]:
qq=dcec.encoder.predict(x)
pp = dcec.target_distribution(qq)  # update the auxiliary target distribution p
print(pp)               

[[ 0.09576062  0.31623173 -0.02340065 ...  0.00540897 -0.06257829
   0.07884236]
 [-0.10482959 -0.03164749  0.01005473 ... -0.4290024   1.2822636
  -0.01309455]
 [ 0.0533722   0.69349027 -0.06957897 ...  0.14484234 -0.27014762
   0.08944123]
 ...
 [ 0.37646866  0.09096929 -1.4917985  ...  0.14286338 -0.03764597
   0.7034949 ]
 [ 0.11029965  0.01935705 -0.0040554  ...  0.05618998 -0.15114951
   0.15795447]
 [-0.18174101 -0.08479969  0.32634103 ... -0.14315802  1.921741
  -0.42669982]]


In [53]:
dcec.compile(loss=['kld', 'mse'], loss_weights=[Ngamma, 1], optimizer=optimizer)
t0 = time() 
dcec.dopretrain(x,cae_weights=None,batch_size=256,epochs=10)
#dcec.dopretrain(x,cae_weights=1,batch_size=256,epochs=30)
t1 = time()


...pretraining CAE using default hyper-parameters:
   optimizer='adam';   epochs= 10
...Pretraining...
Epoch 1/2
Epoch 2/2
Iter a acc = 0.2322, nmi = 0.2265, ari = 0.0316 ;  loss= 0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Pretraining time:  31.26853632926941
Iter a acc = 0.3452, nmi = 0.2727, ari = 0.1290 ;  loss= 0
Pretrained weights are saved to temp/pretrain_cae_model.h5


In [54]:
t10 = time()
dcec.fit(x,  tol=Ntol, maxiter=1e3,
             update_interval=140)
y_pred = dcec.y_pred
q_pred=dcec.q_pred
t11 = time()

printMetrics('end:',y_pred,[0])
#print('acc = %.4f, nmi = %.4f, ari = %.4f' % (metrics.acc(y, y_pred), metrics.nmi(y, y_pred), metrics.ari(y, y_pred)))

Update interval 140
Save interval 280
Initializing cluster centers with k-means.
kMeans acc = 0.6002, nmi = 0.5750, ari = 0.4810 ;  loss= [0, 0, 0]
p [0.00455987 0.01443567 0.00996763 0.01706933 0.00528897 0.9182013
 0.01248278 0.00562948 0.00410865 0.00825639]
Q [0.0367986  0.06953757 0.05390224 0.07309902 0.03736122 0.54095054
 0.06356531 0.0371798  0.03209889 0.05550686]
Iter 0 acc = 0.6002, nmi = 0.5750, ari = 0.4810 ;  loss= [0, 0, 0]
saving model to: temp/dcec_model_0.h5
p [0.03930619 0.00825358 0.01557406 0.03685729 0.82053626 0.00760787
 0.02260883 0.01670157 0.02301905 0.00953533]
Q [0.09276541 0.0479304  0.05984508 0.09271634 0.4102676  0.04376347
 0.07485679 0.05833763 0.06574073 0.05377654]
Iter 140 acc = 0.6164, nmi = 0.6002, ari = 0.5059 ;  loss= [0.024405394, 0.06862287, 0.017543107]
p [1.8210593e-04 8.6325326e-04 2.4908027e-04 8.0788013e-04 1.5980525e-04
 9.9673593e-01 4.1716930e-04 1.3107101e-04 1.4707533e-04 3.0665201e-04]
Q [0.0103443  0.02731973 0.0134798  0.0236242

In [55]:
np.savetxt(fname="ypred.csv",fmt="%d", delimiter=",", X=y_pred)


In [56]:
def loss_mse( inputs, targets):
    error = inputs - targets
    return tf.reduce_mean(tf.square(error))

In [57]:
def loss_kld(inputs,pred):
    _nn=tf.keras.losses.KLD( inputs,pred)
    #return tf.keras.losses.KLDivergence(inputs,pred)
    #loss = k([.4, .9, .2], [.5, .8, .12])
    #return _nn
    return tf.reduce_mean(_nn)

In [58]:
yp=y_pred.astype(K.floatx())
print(q_pred)

[[0.00577047 0.01677052 0.00722646 ... 0.00497918 0.00474525 0.00816023]
 [0.07880014 0.02936345 0.08832586 ... 0.03858086 0.06850583 0.03879023]
 [0.00689968 0.00585611 0.00557377 ... 0.00259356 0.00387477 0.9426914 ]
 ...
 [0.01067292 0.01162967 0.006573   ... 0.00594893 0.00642999 0.01326605]
 [0.02206011 0.01855258 0.01248496 ... 0.01189885 0.01241228 0.02219081]
 [0.83956116 0.01277704 0.01377647 ... 0.01773834 0.04129395 0.01309343]]


In [59]:
k = tf.keras.losses.KLDivergence()
loss = k([.4, .9, .2], [.4, .09, .21])
print('Loss: ', loss.numpy())  # Loss: -0.043



NameError: name 'tf' is not defined

In [None]:
zp=dcec.encoder.predict(x)
print(zp[0])

In [None]:
q, q2 = dcec.model.predict(x, verbose=1)
print(q2[0].shape)
print(loss_mse(x,q2))

In [None]:
print(q.shape)
print(p[0])
print(loss_kld(p,q))

In [None]:
print(loss_kld([[1.,2.,3000],[2,3,14.1]],[[11.,20.,30],[2,3,0.000400]]))

In [None]:
p = dcec.target_distribution(q_pred)  # update the auxiliary target distribution p
print(p.shape)

In [None]:
print(loss_kld(p,q_pred))

In [None]:
import tensorflow as tf

_nn=tf.keras.losses.KLD(zp, p)
print(_nn)
print( tf.reduce_mean(_nn))

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(25,3))

plt.plot(y_pred)
plt.show()

In [None]:
YY['labeld']=y_pred
YY

In [None]:
import numpy as np
import pandas
np.savetxt(fname="mreresult.csv",fmt="%d\t%d\t%f", delimiter="\t",header="time\tnn\tlabeld", X=YY,comments='')

In [None]:
print('Pretrain time:  ', t1 - t0)
print('Clustering time:', t11 - t10)
#print('Total time:     ', t3 - t0)
