In [None]:
"""
Keras implementation for Deep Embedded Clustering (DEC) algorithm:
        Junyuan Xie, Ross Girshick, and Ali Farhadi. Unsupervised deep embedding for clustering analysis. ICML 2016.
Usage:
    Weights of Pretrained autoencoder for mnist are in './ae_weights/mnist_ae_weights.h5':
        python DEC.py mnist --ae_weights ./ae_weights/mnist_ae_weights.h5
    for USPS and REUTERSIDF10K datasets
        python DEC.py usps --update_interval 30 --ae_weights ./ae_weights/usps_ae_weights.h5
        python DEC.py reutersidf10k --n_clusters 4 --update_interval 20 --ae_weights ./ae_weights/reutersidf10k_ae_weights.h5
Author:
    Xifeng Guo. 2017.1.30
"""

from time import time
import numpy as np
import keras.backend as K
from tensorflow.keras.layers import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras.utils.vis_utils import plot_model
import keras

from sklearn.cluster import KMeans
from sklearn import metrics
#from sklearn.utils import linear_assignment
from scipy.optimize import linear_sum_assignment as linear_assignment

# def cluster_acc(y_true, y_pred):
#     """
#     Calculate clustering accuracy. Require scikit-learn installed
#     # Arguments
#         y: true labels, numpy.array with shape `(n_samples,)`
#         y_pred: predicted labels, numpy.array with shape `(n_samples,)`
#     # Return
#         accuracy, in [0,1]
#     """
#     y_true = y_true.astype(np.int64)
#     assert y_pred.size == y_true.size
#     D = max(y_pred.max(), y_true.max()) + 1
#     w = np.zeros((D, D), dtype=np.int64)
#     for i in range(y_pred.size):
#         w[y_pred[i], y_true[i]] += 1
#     ind = linear_assignment(w.max() - w)
#     return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y_true: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from scipy.optimize import linear_sum_assignment``
    ind = linear_sum_assignment(w.max() - w)
    return sum([w[i, j] for i, j in zip(ind[0], ind[1])]) * 1.0 / y_pred.size


def autoencoder(dims, act='relu'):
    """
    Fully connected auto-encoder model, symmetric.
    Arguments:
        dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
            The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
        act: activation, not applied to Input, Hidden and Output layers
    return:
        Model of autoencoder
    """
    n_stacks = len(dims) - 1
    # input
    x = Input(shape=(dims[0],), name='input')
    h = x

    # internal layers in encoder
    for i in range(n_stacks-1):
        h = Dense(dims[i + 1], activation=act, name='encoder_%d' % i)(h)

    # hidden layer
    h = Dense(dims[-1], name='encoder_%d' % (n_stacks - 1))(h)  # hidden layer, features are extracted from here

    # internal layers in decoder
    for i in range(n_stacks-1, 0, -1):
        h = Dense(dims[i], activation=act, name='decoder_%d' % i)(h)

    # output
    h = Dense(dims[0], name='decoder_0')(h)

    return Model(inputs=x, outputs=h)


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = int(n_clusters)
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


class DEC(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 alpha=1.0,
                 batch_size=256):

        super(DEC, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.alpha = alpha
        self.batch_size = batch_size
        self.autoencoder = autoencoder(self.dims)

    def initialize_model(self, optimizer, ae_weights=None):
        if ae_weights is not None:  # load pretrained weights of autoencoder
            self.autoencoder.load_weights(ae_weights)
        # else:
            # inputs = Input(shape=(self.input_dim,))
            # hidden = inputs
            # for i in range(self.n_stacks-1):
            #     hidden = Dense(self.dims[i+1], activation='relu')(hidden)
            # encoded = Dense(self.dims[-1], activation='linear', name='encoder_%d' % (self.n_stacks-1))(hidden)
            # for i in range(self.n_stacks-2, -1, -1):
            #     hidden = Dense(self.dims[i], activation='relu')(hidden)
            # decoded = Dense(self.input_dim, activation='sigmoid', name='decoder_0')(hidden)
            # self.autoencoder = Model(inputs=inputs, outputs=decoded)
            # print('ae_weights must be given. E.g.')
            # print('python DEC.py mnist --ae_weights weights.h5')
            # exit()

        hidden = self.autoencoder.get_layer(name='encoder_%d' % (self.n_stacks - 1)).output
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        # prepare DEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.autoencoder.input, outputs=clustering_layer)
        self.model.compile(loss='kld', optimizer=optimizer)

    def load_weights(self, weights_path):  # load weights of DEC model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        encoder = Model(self.model.input, self.model.get_layer('encoder_%d' % (self.n_stacks - 1)).output)
        return encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def clustering(self, x, y=None,
                   tol=1e-3,
                   update_interval=140,
                   maxiter=2e4,
                   save_dir='./results/dec'):

        print('Update interval', update_interval)
        save_interval = x.shape[0] / self.batch_size * 5  # 5 epochs
        print('Save interval', save_interval)

        # initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = y_pred
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        logfile = open(save_dir + '/dec_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L'])
        logwriter.writeheader()

        loss = 0
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                if y is not None:
                    acc = np.round(cluster_acc(y, y_pred), 5)
                    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
                    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss)
                    logwriter.writerow(logdict)
                    print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)

                # check stop criterion
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * self.batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * self.batch_size::],
                                                 y=p[index * self.batch_size::])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * self.batch_size:(index + 1) * self.batch_size],
                                                 y=p[index * self.batch_size:(index + 1) * self.batch_size])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save IDEC model checkpoints
                print('saving model to:', save_dir + '/DEC_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/DEC_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/DEC_model_final.h5')
        self.model.save_weights(save_dir + '/DEC_model_final.h5')

        return y_pred


# New Section

In [None]:
"""
Implementation for Improved Deep Embedded Clustering as described in paper:
        Xifeng Guo, Long Gao, Xinwang Liu, Jianping Yin. Improved Deep Embedded Clustering with Local Structure
        Preservation. IJCAI 2017.
Usage:
    Weights of Pretrained autoencoder for mnist are in './ae_weights/mnist_ae_weights.h5':
        python IDEC.py mnist --ae_weights ./ae_weights/mnist_ae_weights.h5
    for USPS and REUTERSIDF10K datasets
        python IDEC.py usps --update_interval 30 --ae_weights ./ae_weights/usps_ae_weights.h5
        python IDEC.py reutersidf10k --n_clusters 4 --update_interval 3 --ae_weights ./ae_weights/reutersidf10k_ae_weights.h5
Author:
    Xifeng Guo. 2017.4.30
"""

from time import time
import numpy as np
from keras.models import Model
from keras.optimizers import SGD
from keras.utils.vis_utils import plot_model

from sklearn.cluster import KMeans
from sklearn import metrics



class IDEC(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 alpha=1.0,
                 batch_size=256):

        super(IDEC, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.alpha = alpha
        self.batch_size = batch_size
        self.autoencoder = autoencoder(self.dims)

    def initialize_model(self, ae_weights=None, gamma=0.1, optimizer='adam'):
        if ae_weights is not None:
            self.autoencoder.load_weights(ae_weights)
            print('Pretrained AE weights are loaded successfully.')
        # else:
        # inputs = Input(shape=(self.input_dim,))
        # hidden = inputs
        # for i in range(self.n_stacks-1):
        #     hidden = Dense(self.dims[i+1], activation='relu')(hidden)
        # encoded = Dense(self.dims[-1], activation='linear', name='encoder_%d' % (self.n_stacks-1))(hidden)
        # for i in range(self.n_stacks-2, -1, -1):
        #     hidden = Dense(self.dims[i], activation='relu')(hidden)
        # decoded = Dense(self.input_dim, activation='sigmoid', name='decoder_0')(hidden)
        # self.autoencoder = Model(inputs=inputs, outputs=decoded)
          # print('ae_weights must be given. E.g.')
          # print ('    python IDEC.py mnist --ae_weights weights.h5')
          # exit()

        hidden = self.autoencoder.get_layer(name='encoder_%d' % (self.n_stacks - 1)).output
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        # prepare IDEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.autoencoder.input,
                           outputs=[clustering_layer, self.autoencoder.output])
        self.model.compile(loss={'clustering': 'kld', 'decoder_0': 'mse'},
                           loss_weights=[gamma, 1],
                           optimizer=keras.optimizers.Adam(learning_rate=0.001))

    def load_weights(self, weights_path):  # load weights of IDEC model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        encoder = Model(self.model.input, self.model.get_layer('encoder_%d' % (self.n_stacks - 1)).output)
        return encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):  # target distribution P which enhances the discrimination of soft label Q
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def clustering(self, x, y=None,
                   tol=1e-3,
                   update_interval=140,
                   maxiter=2e4,
                   save_dir='./results/idec'):

        print ('Update interval', update_interval)
        save_interval = x.shape[0] / self.batch_size * 5  # 5 epochs
        print('Save interval', save_interval)

        # initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = y_pred
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/idec_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                if y is not None:
                    acc = np.round(cluster_acc(y, y_pred), 5)
                    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
                    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2])
                    logwriter.writerow(logdict)
                    # print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)
                    print('Iter', ite, ': Acc', acc, '; loss=', loss)


                # check stop criterion
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * self.batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * self.batch_size::],
                                                 y=[p[index * self.batch_size::], x[index * self.batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * self.batch_size:(index + 1) * self.batch_size],
                                                 y=[p[index * self.batch_size:(index + 1) * self.batch_size],
                                                    x[index * self.batch_size:(index + 1) * self.batch_size]])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save IDEC model checkpoints
                print('saving model to:', save_dir + '/IDEC_model_' + str(ite) + '.h5')
                # self.model.save_weights(save_dir + '/IDEC_model_' + str(ite) + '.h5')
                self.encoder.save_weights(save_dir + '/IDEC_model_' + str(ite) + '.h5')


            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/IDEC_model_final.h5')
        self.encoder.save_weights(save_dir + '/IDEC_model_final.h5')

        return y_pred

|H|B|C|
|---|---|---|
|g|h|h|
|g|h|h|
|g|h|h|



In [None]:
# import argparse

# parser = argparse.ArgumentParser(description='train',
#                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# parser.add_argument('dataset', default='mnist', choices=['mnist', 'usps', 'reutersidf10k'])
# parser.add_argument('--n_clusters', default=10, type=int)
# parser.add_argument('--batch_size', default=256, type=int)
# parser.add_argument('--maxiter', default=2e4, type=int)
# parser.add_argument('--gamma', default=0.1, type=float,
#                     help='coefficient of clustering loss')
# parser.add_argument('--update_interval', default=140, type=int)
# parser.add_argument('--tol', default=0.001, type=float)
# parser.add_argument('--ae_weights', default=None, help='This argument must be given')
# parser.add_argument('--save_dir', default='results/idec')
# args = parser.parse_args()
# print(args)


dataset ="mnist"
n_clusters = 2
batch_size = 32
maxiter = 20000
gamma = 0.1
update_interval = 140
tol = 0.001
# ae_weights = "/content/drive/MyDrive/Colab Notebooks/btp/saved_weights/Copy of IDEC_model_final.h5"
# save_dir = 'results/idec'
save_dir="/content/drive/MyDrive/Colab Notebooks/btp/results"


In [None]:
# def load_mnist():
#     # the data, shuffled and split between train and test sets
#     from keras.datasets import mnist
#     (x_train, y_train), (x_test, y_test) = mnist.load_data()
#     x = np.concatenate((x_train, x_test))
#     y = np.concatenate((y_train, y_test))
#     x = x.reshape((x.shape[0], -1))
#     x = np.divide(x, 50.)  # normalize as it does in DEC paper
#     print ('MNIST samples', x.shape)
#     return x,y

In [None]:
# # load dataset
# # optimizer = SGD(lr=0.1, momentum=0.99)
# # from datasets import load_mnist, load_reuters, load_usps

# if dataset == "mnist":  # recommends: n_clusters=10, update_interval=140
#     x, y = load_mnist()
#     optimizer = 'adam'
# # elif dataset == 'usps':  # recommends: n_clusters=10, update_interval=30
# #     x, y = load_usps('data/usps')
# # elif dataset == 'reutersidf10k':  # recommends: n_clusters=4, update_interval=3
# #     x, y = load_reuters('data/reuters')

In [None]:
# x, y = load_mnist()

In [None]:
import scipy.io
mat = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/btp/data-and-models-master/postprocessing/ITA_train.mat')
mat['ITA_train']
import numpy as np
data = np.array(mat['ITA_train'])
data = np.transpose(data)
x = data[:,2:]
y = data[:, :1]


In [None]:
# import scipy.io
# mat = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/btp/data-and-models-master/postprocessing/GER_train.mat')
# mat['GER_train']
# import numpy as np
# data = np.array(mat['GER_train'])
# data = np.transpose(data)
# x = data[:,2:]
# y = data[:, :1]

In [None]:
y=y.reshape(y.shape[0])
y.shape

(6981,)

In [None]:
# y
# freq = [0]*10
# count=0
# for i in y:
#   freq[i]+=1
#   count+=1
# freq

In [None]:
optimizer = 'adam'
# ae_weights[0]

In [None]:
# prepare the IDEC model
idec = IDEC(dims=[x.shape[-1], 200, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size)
idec.initialize_model(gamma=gamma, optimizer=optimizer)
plot_model(idec.model, to_file='idec_model.png', show_shapes=True)
idec.model.summary()

Model: "model_14"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 19)]         0           []                               
                                                                                                  
 encoder_0 (Dense)              (None, 200)          4000        ['input[0][0]']                  
                                                                                                  
 encoder_1 (Dense)              (None, 500)          100500      ['encoder_0[0][0]']              
                                                                                                  
 encoder_2 (Dense)              (None, 2000)         1002000     ['encoder_1[0][0]']              
                                                                                           

In [None]:
# begin clustering, time not include pretraining part.
t0 = time()
y_pred = idec.clustering(x, y=y, tol=tol, maxiter=maxiter,
                         update_interval=update_interval, save_dir="/content/drive/MyDrive/Colab Notebooks/btp/results")
print ('acc:', cluster_acc(y, y_pred))
print ('clustering time: ', (time() - t0))

Update interval 140
Save interval 1603.4375
Initializing cluster centers with k-means.
Iter 0 : Acc 0.60437 ; loss= [0 0 0]
saving model to: /content/drive/MyDrive/Colab Notebooks/btp/results/IDEC_model_0.h5
Iter 140 : Acc 0.61937 ; loss= [0.02994 0.00053 0.02989]
Iter 280 : Acc 0.62795 ; loss= [0.01958 0.00011 0.01956]
Iter 420 : Acc 0.64188 ; loss= [0.01096 0.00017 0.01094]
Iter 560 : Acc 0.64081 ; loss= [0.00637 0.00012 0.00636]
Iter 700 : Acc 0.63984 ; loss= [0.00476 0.0006  0.0047 ]
Iter 840 : Acc 0.63964 ; loss= [0.00417 0.0004  0.00413]
Iter 980 : Acc 0.63837 ; loss= [0.00618 0.00325 0.00585]
Iter 1120 : Acc 0.63721 ; loss= [0.00975 0.00987 0.00877]
Iter 1260 : Acc 0.63818 ; loss= [0.00879 0.02138 0.00665]
Iter 1400 : Acc 0.63837 ; loss= [0.00805 0.02424 0.00563]
Iter 1540 : Acc 0.63925 ; loss= [0.02623 0.04115 0.02212]
Iter 1680 : Acc 0.63857 ; loss= [0.00573 0.0279  0.00294]
Iter 1820 : Acc 0.63818 ; loss= [0.00683 0.02629 0.0042 ]
Iter 1960 : Acc 0.63876 ; loss= [0.00728 0.02

In [None]:
# unique_classes = np.unique(y_pred)
# class_counts = {c: np.count_nonzero(y_pred == c) for c in unique_classes}

# for c in class_counts:
#     print(f"Class {c}: {class_counts[c]}")

# New Section

In [None]:
# freq = [0]*10
# count=0
# for i in y:
#   freq[i]+=1
#   count+=1
# freq
# # count

In [None]:
x

array([[0.17288867, 0.16044151, 0.10880093, ..., 0.39100349, 0.42424242,
        0.46551724],
       [0.06539222, 0.07999692, 0.06475227, ..., 0.17344816, 0.57575758,
        0.53448276],
       [0.15515482, 0.3123767 , 0.18473457, ..., 0.63972079, 0.73333333,
        0.77142857],
       ...,
       [0.23718126, 0.22727425, 0.18765866, ..., 0.43851575, 0.44444444,
        0.43478261],
       [0.10487369, 0.11139848, 0.10635577, ..., 0.11059878, 0.14814815,
        0.26086957],
       [0.12130756, 0.122106  , 0.1203688 , ..., 0.23272099, 0.40740741,
        0.30434783]])

In [None]:
# modl = IDEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size)
# modl.initialize_model(ae_weights=ae_weights, gamma=gamma, optimizer=optimizer)

In [None]:
# from tensorflow.keras.models import load_model
# modl.load_weights('/content/drive/MyDrive/Colab Notebooks/btp/results/IDEC_model_final.h5')

In [None]:
# modl.model.summary()

In [None]:
# ny =predict_with_idc(model, x)

In [None]:
# import tensorflow as tf

In [None]:
# ny = model.predict_on_batch(x)

In [None]:
# import scipy.io
# mat = scipy.io.loadmat('/content/GER_train.mat')
# mat['GER_train']

# import numpy as np
# data = np.array(mat['GER_train'])
# data = np.transpose(data)
# X = data[:,2:]
# Y = data[:, :1]