# Run scDeepCluster on the simulated data

In [1]:
"""
This part implements the scDeepCluster algoritm
"""

from time import time
import numpy as np
from keras.models import Model
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input, GaussianNoise, Layer, Activation
from keras.models import Model
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics

import h5py
import scanpy.api as sc
from scDeepCluster_layers import ConstantDispersionLayer, SliceLayer, ColWiseMultLayer
from scDeepCluster_loss import poisson_loss, NB, ZINB
from scDeepCluster_preprocess import read_dataset, normalize
import tensorflow as tf

from numpy.random import seed
seed(2211)
from tensorflow import set_random_seed
set_random_seed(2211)

MeanAct = lambda x: tf.clip_by_value(K.exp(x), 1e-5, 1e6)
DispAct = lambda x: tf.clip_by_value(tf.nn.softplus(x), 1e-4, 1e4)

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size


def autoencoder(dims, noise_sd=0, init='glorot_uniform', act='relu'):
    """
    Fully connected auto-encoder model, symmetric.
    Arguments:
        dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
            The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
        act: activation, not applied to Input, Hidden and Output layers
    return:
        Model of autoencoder
    """
    n_stacks = len(dims) - 1
    # input
    sf_layer = Input(shape=(1,), name='size_factors')
    x = Input(shape=(dims[0],), name='counts')
    h = x
    h = GaussianNoise(noise_sd, name='input_noise')(h)
 
    # internal layers in encoder
    for i in range(n_stacks-1):
        h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h)
        h = GaussianNoise(noise_sd, name='noise_%d' % i)(h)    # add Gaussian noise
        h = Activation(act)(h)
    # hidden layer
    h = Dense(dims[-1], kernel_initializer=init, name='encoder_hidden')(h)  # hidden layer, features are extracted from here

    # internal layers in decoder
    for i in range(n_stacks-1, 0, -1):
        h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h)

    # output
 
    pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h)

    disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h)

    mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h)

    output = ColWiseMultLayer(name='output')([mean, sf_layer])
    output = SliceLayer(0, name='slice')([output, disp, pi])

    return Model(inputs=[x, sf_layer], outputs=output)


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape = (self.n_clusters, input_dim), 
                                        initializer='glorot_uniform',
                                        name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))



class SCDeepCluster(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 noise_sd=0,
                 alpha=1.0,
                 ridge=0,
                 debug=False):

        super(SCDeepCluster, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.noise_sd = noise_sd
        self.alpha = alpha
        self.act = 'relu'
        self.ridge = ridge
        self.debug = debug
        self.autoencoder = autoencoder(self.dims, noise_sd=self.noise_sd, act = self.act)
        
        # prepare clean encode model without Gaussian noise
        ae_layers = [l for l in self.autoencoder.layers]
        hidden = self.autoencoder.input[0]
        for i in range(1, len(ae_layers)):
            if "noise" in ae_layers[i].name:
                next
            elif "dropout" in ae_layers[i].name:
                next
            else:
                hidden = ae_layers[i](hidden)
            if "encoder_hidden" in ae_layers[i].name:  # only get encoder layers
                break
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        pi = self.autoencoder.get_layer(name='pi').output
        disp = self.autoencoder.get_layer(name='dispersion').output
        mean = self.autoencoder.get_layer(name='mean').output
        zinb = ZINB(pi, theta=disp, ridge_lambda=self.ridge, debug=self.debug)
        self.loss = zinb.loss

        clustering_layer = ClusteringLayer(self.n_clusters, alpha=self.alpha, name='clustering')(hidden)
        self.model = Model(inputs=[self.autoencoder.input[0], self.autoencoder.input[1]],
                           outputs=[clustering_layer, self.autoencoder.output])

        self.pretrained = False
        self.centers = []
        self.y_pred = []

    def pretrain(self, x, y, batch_size=256, epochs=200, optimizer='adam', ae_file='ae_weights.h5'):
        print('...Pretraining autoencoder...')
        self.autoencoder.compile(loss=self.loss, optimizer=optimizer)
        es = EarlyStopping(monitor="loss", patience=50, verbose=0)
        self.autoencoder.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, callbacks=[es], verbose = 0)
        self.autoencoder.save_weights(ae_file)
        print('Pretrained weights are saved to ./' + str(ae_file))
        self.pretrained = True

    def load_weights(self, weights_path):  # load weights of scDeepCluster model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):  # target distribution P which enhances the discrimination of soft label Q
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def fit(self, x_counts, sf, y, raw_counts, batch_size=256, maxiter=2e4, tol=1e-3, update_interval=140,
            ae_weights=None, save_dir='./output/pickle_results/scDeepCluster', loss_weights=[1,1], optimizer='adadelta'):

        self.model.compile(loss=['kld', self.loss], loss_weights=loss_weights, optimizer=optimizer)

        print('Update interval', update_interval)
        save_interval = int(x_counts.shape[0] / batch_size) * 5  # 5 epochs
        print('Save interval', save_interval)

        # Step 1: pretrain
        if not self.pretrained and ae_weights is None:
            print('...pretraining autoencoders using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=200')
            self.pretrain(x, batch_size)
            self.pretrained = True
        elif ae_weights is not None:
            self.autoencoder.load_weights(ae_weights)
            print('ae_weights is loaded successfully.')

        # Step 2: initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict([x_counts, sf]))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/scDeepCluster_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict([x_counts, sf], verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
                    acc = np.round(cluster_acc(y, self.y_pred), 5)
                    nmi = np.round(metrics.normalized_mutual_info_score(y, self.y_pred), 5)
                    ari = np.round(metrics.adjusted_rand_score(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    logwriter.writerow(dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2]))
                    print('Iter-%d: ACC= %.4f, NMI= %.4f, ARI= %.4f;  L= %.5f, Lc= %.5f,  Lr= %.5f'
                          % (ite, acc, nmi, ari, loss[0], loss[1], loss[2]))

                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x_counts.shape[0]:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size::], sf[index * batch_size:]],
                                                 y=[p[index * batch_size::], raw_counts[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size:(index + 1) * batch_size], 
                                                    sf[index * batch_size:(index + 1) * batch_size]],
                                                 y=[p[index * batch_size:(index + 1) * batch_size],
                                                    raw_counts[index * batch_size:(index + 1) * batch_size]])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save scDeepCluster model checkpoints
                print('saving model to: ' + save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to: ' + save_dir + '/scDeepCluster_model_final.h5')
        self.model.save_weights(save_dir + '/scDeepCluster_model_final.h5')
        
        return self.y_pred

Using TensorFlow backend.

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [2]:
import glob2

In [3]:
category = "real_data"# "balanced"#"balanced_0.25" #
import glob2
files = glob2.glob('../real_data/*.h5')
files = [f[len("../real_data/"):-3] for f in files]
files

['10X_PBMC_select_2100',
 'mouse_ES_cell',
 'worm_neuron_cell_select_2100',
 'worm_neuron_cell',
 'mouse_bladder_cell',
 'mouse_ES_cell_select_2100',
 'mouse_bladder_cell_select_2100',
 '10X_PBMC']

In [7]:
df = pd.DataFrame(columns = ["dataset", "scDeepCluster", "run"])
for dataset in files:
    print(f">>> Dataset {dataset} ")
    data_mat = h5py.File(f"{path}{category}/{dataset}.h5", "r")
    x = np.array(data_mat['X'])
    y = np.array(data_mat['Y'])
    #### Run scDeepCluster on the simulated data

    optimizer1 = Adam(amsgrad=True)
    optimizer2 = 'adadelta'


    # preprocessing scRNA-seq read counts matrix
    adata = sc.AnnData(x)
    adata.obs['Group'] = y

    adata = read_dataset(adata, transpose=False, test_split=False, copy=True)

    adata = normalize(adata,
                      size_factors=True,
                      normalize_input=True,
                      logtrans_input=True)

    input_size = adata.n_vars

    print('Sample size')
    print(adata.X.shape)
    print(y.shape)

    x_sd = adata.X.std(0)
    x_sd_median = np.median(x_sd)

    update_interval = int(adata.X.shape[0] / 256)
    for run in range(1):
        seed = run
        np.random.seed(seed)
        # Define scDeepCluster model
        scDeepCluster = SCDeepCluster(dims=[input_size, 256, 64, 32],
                                      n_clusters=3,
                                      noise_sd=2.5)

        t0 = time()

        # Pretrain autoencoders before clustering
        scDeepCluster.pretrain(x=[adata.X, adata.obs.size_factors],
                               y=adata.raw.X,
                               batch_size=256,
                               epochs=600,
                               optimizer=optimizer1,
                               ae_file='ae_weights.h5')

        # begin clustering, time not include pretraining part.

        gamma = 1.  # set hyperparameter gamma
        scDeepCluster.fit(x_counts=adata.X,
                          sf=adata.obs.size_factors,
                          y=y,
                          raw_counts=adata.raw.X,
                          batch_size=256,
                          tol=0.001,
                          maxiter=20000,
                          update_interval=update_interval,
                          ae_weights=None,
                          save_dir='scDeepCluster',
                          loss_weights=[gamma, 1],
                          optimizer=optimizer2)

        # Show the final results
        y_pred = scDeepCluster.y_pred
        acc = np.round(cluster_acc(y, scDeepCluster.y_pred), 5)
        nmi = np.round(metrics.normalized_mutual_info_score(y, scDeepCluster.y_pred),
                       5)
        ari = np.round(metrics.adjusted_rand_score(y, scDeepCluster.y_pred), 5)
        print('Final: ACC= %.4f, NMI= %.4f, ARI= %.4f' % (acc, nmi, ari))
        print('Clustering time: %d seconds.' % int(time() - t0))
        df.loc[df.shape[0]] = [dataset, ari, run]

        df.to_pickle(f"{path}output/pickle_results/{category}_scDeepCluster.pkl")

>>> Dataset mouse_ES_cell 
### Autoencoder: Successfully preprocessed 24175 genes and 2717 cells.
Sample size
(2717, 24047)
(2717,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 10
Save interval 50
Initializing cluster centers with k-means.




Iter-0: ACC= 0.8763, NMI= 0.8512, ARI= 0.8325;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-10: ACC= 0.8771, NMI= 0.8561, ARI= 0.8349;  L= 1.00411, Lc= 0.06471,  Lr= 0.93941




Iter-20: ACC= 0.8778, NMI= 0.8595, ARI= 0.8370;  L= 1.02383, Lc= 0.07987,  Lr= 0.94396




Iter-30: ACC= 0.8778, NMI= 0.8592, ARI= 0.8370;  L= 0.71298, Lc= 0.10029,  Lr= 0.61270
delta_label  0.000736105999263894 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5




Final: ACC= 0.8778, NMI= 0.8592, ARI= 0.8370
Clustering time: 4392 seconds.
>>> Dataset worm_neuron_cell_select_2100 
### Autoencoder: Successfully preprocessed 13488 genes and 2100 cells.
Sample size
(2100, 11955)
(2100,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 8
Save interval 40
Initializing cluster centers with k-means.




Iter-0: ACC= 0.3910, NMI= 0.3365, ARI= 0.1855;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-8: ACC= 0.3914, NMI= 0.3346, ARI= 0.1859;  L= 0.06308, Lc= 0.02477,  Lr= 0.03831




Iter-16: ACC= 0.3924, NMI= 0.3376, ARI= 0.1887;  L= 0.07189, Lc= 0.03347,  Lr= 0.03842




Iter-24: ACC= 0.3943, NMI= 0.3375, ARI= 0.1908;  L= 0.07815, Lc= 0.04192,  Lr= 0.03623




Iter-32: ACC= 0.3943, NMI= 0.3338, ARI= 0.1904;  L= 0.08884, Lc= 0.04930,  Lr= 0.03954




Iter-40: ACC= 0.3948, NMI= 0.3331, ARI= 0.1915;  L= 0.08545, Lc= 0.04663,  Lr= 0.03882
saving model to: scDeepCluster/scDeepCluster_model_40.h5




Iter-48: ACC= 0.3971, NMI= 0.3353, ARI= 0.1942;  L= 0.09174, Lc= 0.05003,  Lr= 0.04171




Iter-56: ACC= 0.3971, NMI= 0.3347, ARI= 0.1940;  L= 0.09754, Lc= 0.05709,  Lr= 0.04045




Iter-64: ACC= 0.3948, NMI= 0.3294, ARI= 0.1926;  L= 0.10109, Lc= 0.05861,  Lr= 0.04248




Iter-72: ACC= 0.3924, NMI= 0.3241, ARI= 0.1928;  L= 0.11596, Lc= 0.07082,  Lr= 0.04513




Iter-80: ACC= 0.3919, NMI= 0.3228, ARI= 0.1930;  L= 0.10675, Lc= 0.06075,  Lr= 0.04599
saving model to: scDeepCluster/scDeepCluster_model_80.h5




Iter-88: ACC= 0.3933, NMI= 0.3241, ARI= 0.1941;  L= 0.11709, Lc= 0.06983,  Lr= 0.04726




Iter-96: ACC= 0.3943, NMI= 0.3234, ARI= 0.1944;  L= 0.11915, Lc= 0.07395,  Lr= 0.04520




Iter-104: ACC= 0.3938, NMI= 0.3219, ARI= 0.1939;  L= 0.13336, Lc= 0.08144,  Lr= 0.05192




Iter-112: ACC= 0.3952, NMI= 0.3230, ARI= 0.1963;  L= 0.12859, Lc= 0.07750,  Lr= 0.05109




Iter-120: ACC= 0.3957, NMI= 0.3250, ARI= 0.1976;  L= 0.13970, Lc= 0.08279,  Lr= 0.05691
saving model to: scDeepCluster/scDeepCluster_model_120.h5




Iter-128: ACC= 0.3967, NMI= 0.3282, ARI= 0.1988;  L= 0.13356, Lc= 0.07992,  Lr= 0.05364




Iter-136: ACC= 0.3957, NMI= 0.3268, ARI= 0.1981;  L= 0.14410, Lc= 0.08509,  Lr= 0.05901




Iter-144: ACC= 0.3924, NMI= 0.3224, ARI= 0.1967;  L= 0.15325, Lc= 0.09291,  Lr= 0.06034




Iter-152: ACC= 0.3929, NMI= 0.3208, ARI= 0.1970;  L= 0.14473, Lc= 0.07979,  Lr= 0.06494




Iter-160: ACC= 0.3929, NMI= 0.3231, ARI= 0.1977;  L= 0.14851, Lc= 0.08297,  Lr= 0.06554
saving model to: scDeepCluster/scDeepCluster_model_160.h5




Iter-168: ACC= 0.3924, NMI= 0.3186, ARI= 0.1968;  L= 0.14070, Lc= 0.08001,  Lr= 0.06070




Iter-176: ACC= 0.3924, NMI= 0.3184, ARI= 0.1966;  L= 0.14715, Lc= 0.07963,  Lr= 0.06752




Iter-184: ACC= 0.3914, NMI= 0.3181, ARI= 0.1960;  L= 0.14546, Lc= 0.07833,  Lr= 0.06713




Iter-192: ACC= 0.3929, NMI= 0.3168, ARI= 0.1965;  L= 0.15571, Lc= 0.08180,  Lr= 0.07391




Iter-200: ACC= 0.3929, NMI= 0.3185, ARI= 0.1974;  L= 0.13780, Lc= 0.07102,  Lr= 0.06678
saving model to: scDeepCluster/scDeepCluster_model_200.h5




Iter-208: ACC= 0.3914, NMI= 0.3173, ARI= 0.1967;  L= 0.14898, Lc= 0.07725,  Lr= 0.07173




Iter-216: ACC= 0.3914, NMI= 0.3130, ARI= 0.1958;  L= 0.11872, Lc= 0.04613,  Lr= 0.07259




Iter-224: ACC= 0.3914, NMI= 0.3139, ARI= 0.1962;  L= 0.13847, Lc= 0.06405,  Lr= 0.07442




Iter-232: ACC= 0.3914, NMI= 0.3168, ARI= 0.1972;  L= 0.14076, Lc= 0.06673,  Lr= 0.07403




Iter-240: ACC= 0.3905, NMI= 0.3139, ARI= 0.1958;  L= 0.12941, Lc= 0.06334,  Lr= 0.06607
saving model to: scDeepCluster/scDeepCluster_model_240.h5




Iter-248: ACC= 0.3905, NMI= 0.3133, ARI= 0.1958;  L= 0.13421, Lc= 0.06139,  Lr= 0.07282
delta_label  0.0009523809523809524 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: ACC= 0.3905, NMI= 0.3133, ARI= 0.1958
Clustering time: 2087 seconds.
>>> Dataset worm_neuron_cell 
### Autoencoder: Successfully preprocessed 13488 genes and 4186 cells.
Sample size
(4186, 13488)
(4186,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 16
Save interval 80
Initializing cluster centers with k-means.




Iter-0: ACC= 0.4417, NMI= 0.3463, ARI= 0.2392;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-16: ACC= 0.4491, NMI= 0.3594, ARI= 0.2536;  L= 0.06220, Lc= 0.01934,  Lr= 0.04287




Iter-32: ACC= 0.4556, NMI= 0.3717, ARI= 0.2660;  L= 0.07172, Lc= 0.02766,  Lr= 0.04406




Iter-48: ACC= 0.4565, NMI= 0.3714, ARI= 0.2686;  L= 0.08010, Lc= 0.03371,  Lr= 0.04639




Iter-64: ACC= 0.4580, NMI= 0.3751, ARI= 0.2714;  L= 0.09433, Lc= 0.04731,  Lr= 0.04702




Iter-80: ACC= 0.4577, NMI= 0.3760, ARI= 0.2712;  L= 0.09740, Lc= 0.04893,  Lr= 0.04847
saving model to: scDeepCluster/scDeepCluster_model_80.h5




Iter-96: ACC= 0.4587, NMI= 0.3804, ARI= 0.2731;  L= 0.11088, Lc= 0.05581,  Lr= 0.05507




Iter-112: ACC= 0.4601, NMI= 0.3841, ARI= 0.2754;  L= 0.10841, Lc= 0.05636,  Lr= 0.05205




Iter-128: ACC= 0.4603, NMI= 0.3858, ARI= 0.2760;  L= 0.12485, Lc= 0.06254,  Lr= 0.06231




Iter-144: ACC= 0.4560, NMI= 0.3841, ARI= 0.2723;  L= 0.12184, Lc= 0.06068,  Lr= 0.06116




Iter-160: ACC= 0.4575, NMI= 0.3900, ARI= 0.2745;  L= 0.12668, Lc= 0.06335,  Lr= 0.06333
saving model to: scDeepCluster/scDeepCluster_model_160.h5




Iter-176: ACC= 0.4529, NMI= 0.3851, ARI= 0.2709;  L= 0.13555, Lc= 0.07050,  Lr= 0.06505




Iter-192: ACC= 0.4462, NMI= 0.3795, ARI= 0.2639;  L= 0.12834, Lc= 0.06454,  Lr= 0.06379




Iter-208: ACC= 0.4436, NMI= 0.3770, ARI= 0.2627;  L= 0.12152, Lc= 0.06884,  Lr= 0.05268




Iter-224: ACC= 0.4420, NMI= 0.3759, ARI= 0.2618;  L= 0.12069, Lc= 0.06855,  Lr= 0.05214




Iter-240: ACC= 0.4424, NMI= 0.3795, ARI= 0.2637;  L= 0.12245, Lc= 0.06817,  Lr= 0.05428
saving model to: scDeepCluster/scDeepCluster_model_240.h5




Iter-256: ACC= 0.4398, NMI= 0.3786, ARI= 0.2609;  L= 0.12312, Lc= 0.06568,  Lr= 0.05744




Iter-272: ACC= 0.4427, NMI= 0.3873, ARI= 0.2660;  L= 0.13272, Lc= 0.05567,  Lr= 0.07705




Iter-288: ACC= 0.4424, NMI= 0.3863, ARI= 0.2656;  L= 0.12111, Lc= 0.05242,  Lr= 0.06869




Iter-304: ACC= 0.4415, NMI= 0.3888, ARI= 0.2655;  L= 0.12839, Lc= 0.05859,  Lr= 0.06980




Iter-320: ACC= 0.4417, NMI= 0.3893, ARI= 0.2659;  L= 0.12343, Lc= 0.05213,  Lr= 0.07131
saving model to: scDeepCluster/scDeepCluster_model_320.h5




Iter-336: ACC= 0.4391, NMI= 0.3867, ARI= 0.2631;  L= 0.12813, Lc= 0.05777,  Lr= 0.07036




Iter-352: ACC= 0.4400, NMI= 0.3877, ARI= 0.2642;  L= 0.11605, Lc= 0.04818,  Lr= 0.06787




Iter-368: ACC= 0.4393, NMI= 0.3871, ARI= 0.2633;  L= 0.11685, Lc= 0.04610,  Lr= 0.07075




Iter-384: ACC= 0.4391, NMI= 0.3887, ARI= 0.2634;  L= 0.11418, Lc= 0.04907,  Lr= 0.06511




Iter-400: ACC= 0.4396, NMI= 0.3887, ARI= 0.2640;  L= 0.12301, Lc= 0.05057,  Lr= 0.07245
saving model to: scDeepCluster/scDeepCluster_model_400.h5




Iter-416: ACC= 0.4386, NMI= 0.3886, ARI= 0.2629;  L= 0.11415, Lc= 0.04709,  Lr= 0.06707




Iter-432: ACC= 0.4386, NMI= 0.3881, ARI= 0.2628;  L= 0.11140, Lc= 0.04280,  Lr= 0.06860
delta_label  0.000716674629718108 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: ACC= 0.4386, NMI= 0.3881, ARI= 0.2628
Clustering time: 4479 seconds.
>>> Dataset mouse_bladder_cell 
### Autoencoder: Successfully preprocessed 20670 genes and 2746 cells.
Sample size
(2746, 19771)
(2746,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 10
Save interval 50
Initializing cluster centers with k-means.




Iter-0: ACC= 0.4232, NMI= 0.5046, ARI= 0.3697;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-10: ACC= 0.4235, NMI= 0.5091, ARI= 0.3705;  L= 0.16110, Lc= 0.04234,  Lr= 0.11876




Iter-20: ACC= 0.4235, NMI= 0.5123, ARI= 0.3717;  L= 0.18853, Lc= 0.05958,  Lr= 0.12895




Iter-30: ACC= 0.4246, NMI= 0.5161, ARI= 0.3733;  L= 0.18714, Lc= 0.06851,  Lr= 0.11863




Iter-40: ACC= 0.4239, NMI= 0.5153, ARI= 0.3733;  L= 0.19807, Lc= 0.06866,  Lr= 0.12940




Iter-50: ACC= 0.4243, NMI= 0.5218, ARI= 0.3770;  L= 0.20901, Lc= 0.07515,  Lr= 0.13386
saving model to: scDeepCluster/scDeepCluster_model_50.h5




Iter-60: ACC= 0.4264, NMI= 0.5208, ARI= 0.3777;  L= 0.22226, Lc= 0.07902,  Lr= 0.14324




Iter-70: ACC= 0.4323, NMI= 0.5242, ARI= 0.3812;  L= 0.22870, Lc= 0.08167,  Lr= 0.14703




Iter-80: ACC= 0.4366, NMI= 0.5268, ARI= 0.3854;  L= 0.22431, Lc= 0.07329,  Lr= 0.15102




Iter-90: ACC= 0.4435, NMI= 0.5328, ARI= 0.3901;  L= 0.23351, Lc= 0.07229,  Lr= 0.16121




Iter-100: ACC= 0.4461, NMI= 0.5345, ARI= 0.3937;  L= 0.24038, Lc= 0.07638,  Lr= 0.16401
saving model to: scDeepCluster/scDeepCluster_model_100.h5




Iter-110: ACC= 0.4497, NMI= 0.5386, ARI= 0.3980;  L= 0.22358, Lc= 0.06527,  Lr= 0.15830




Iter-120: ACC= 0.4519, NMI= 0.5432, ARI= 0.4011;  L= 0.21834, Lc= 0.06431,  Lr= 0.15404




Iter-130: ACC= 0.4559, NMI= 0.5508, ARI= 0.4071;  L= 0.22528, Lc= 0.06254,  Lr= 0.16273




Iter-140: ACC= 0.4574, NMI= 0.5569, ARI= 0.4123;  L= 0.19843, Lc= 0.05121,  Lr= 0.14722




Iter-150: ACC= 0.4588, NMI= 0.5587, ARI= 0.4130;  L= 0.22144, Lc= 0.05624,  Lr= 0.16520
saving model to: scDeepCluster/scDeepCluster_model_150.h5




Iter-160: ACC= 0.4603, NMI= 0.5612, ARI= 0.4152;  L= 0.21478, Lc= 0.04997,  Lr= 0.16481




Iter-170: ACC= 0.4607, NMI= 0.5606, ARI= 0.4150;  L= 0.21865, Lc= 0.04525,  Lr= 0.17340




Iter-180: ACC= 0.4614, NMI= 0.5603, ARI= 0.4149;  L= 0.21836, Lc= 0.04438,  Lr= 0.17398




Iter-190: ACC= 0.4618, NMI= 0.5616, ARI= 0.4161;  L= 0.20935, Lc= 0.04061,  Lr= 0.16874




Iter-200: ACC= 0.4618, NMI= 0.5605, ARI= 0.4151;  L= 0.21868, Lc= 0.04006,  Lr= 0.17862
saving model to: scDeepCluster/scDeepCluster_model_200.h5




Iter-210: ACC= 0.4625, NMI= 0.5618, ARI= 0.4160;  L= 0.22392, Lc= 0.04459,  Lr= 0.17933




Iter-220: ACC= 0.4625, NMI= 0.5627, ARI= 0.4166;  L= 0.19937, Lc= 0.03199,  Lr= 0.16737




Iter-230: ACC= 0.4625, NMI= 0.5630, ARI= 0.4165;  L= 0.19987, Lc= 0.03767,  Lr= 0.16220




Iter-240: ACC= 0.4629, NMI= 0.5644, ARI= 0.4179;  L= 0.20222, Lc= 0.03374,  Lr= 0.16848




Iter-250: ACC= 0.4625, NMI= 0.5640, ARI= 0.4175;  L= 0.17897, Lc= 0.02874,  Lr= 0.15023
saving model to: scDeepCluster/scDeepCluster_model_250.h5




Iter-260: ACC= 0.4625, NMI= 0.5630, ARI= 0.4165;  L= 0.20708, Lc= 0.03808,  Lr= 0.16900




Iter-270: ACC= 0.4632, NMI= 0.5660, ARI= 0.4184;  L= 0.19866, Lc= 0.03037,  Lr= 0.16828




Iter-280: ACC= 0.4629, NMI= 0.5625, ARI= 0.4167;  L= 0.20437, Lc= 0.02789,  Lr= 0.17649




Iter-290: ACC= 0.4639, NMI= 0.5649, ARI= 0.4179;  L= 0.20445, Lc= 0.02833,  Lr= 0.17612




Iter-300: ACC= 0.4636, NMI= 0.5635, ARI= 0.4173;  L= 0.20063, Lc= 0.03012,  Lr= 0.17052
delta_label  0.0007283321194464676 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5




Final: ACC= 0.4636, NMI= 0.5635, ARI= 0.4173
Clustering time: 4229 seconds.
>>> Dataset mouse_ES_cell_select_2100 
### Autoencoder: Successfully preprocessed 24175 genes and 2100 cells.
Sample size
(2100, 24046)
(2100,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 8
Save interval 40
Initializing cluster centers with k-means.




Iter-0: ACC= 0.8814, NMI= 0.8476, ARI= 0.8286;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-8: ACC= 0.8857, NMI= 0.8604, ARI= 0.8407;  L= 0.80938, Lc= 0.06196,  Lr= 0.74742




Iter-16: ACC= 0.8886, NMI= 0.8702, ARI= 0.8489;  L= 0.81613, Lc= 0.07372,  Lr= 0.74241




Iter-24: ACC= 0.8886, NMI= 0.8710, ARI= 0.8489;  L= 0.81625, Lc= 0.07053,  Lr= 0.74572
delta_label  0.0004761904761904762 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5




Final: ACC= 0.8886, NMI= 0.8710, ARI= 0.8489
Clustering time: 3236 seconds.
>>> Dataset mouse_bladder_cell_select_2100 
### Autoencoder: Successfully preprocessed 20670 genes and 2100 cells.
Sample size
(2100, 19079)
(2100,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 8
Save interval 40
Initializing cluster centers with k-means.




Iter-0: ACC= 0.4419, NMI= 0.5010, ARI= 0.3797;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-8: ACC= 0.4452, NMI= 0.5151, ARI= 0.3879;  L= 0.16304, Lc= 0.04278,  Lr= 0.12026




Iter-16: ACC= 0.4448, NMI= 0.5273, ARI= 0.3920;  L= 0.17735, Lc= 0.05791,  Lr= 0.11944




Iter-24: ACC= 0.4448, NMI= 0.5291, ARI= 0.3926;  L= 0.18673, Lc= 0.06516,  Lr= 0.12157




Iter-32: ACC= 0.4467, NMI= 0.5353, ARI= 0.3965;  L= 0.19445, Lc= 0.07184,  Lr= 0.12261




Iter-40: ACC= 0.4486, NMI= 0.5395, ARI= 0.3992;  L= 0.19956, Lc= 0.07523,  Lr= 0.12433
saving model to: scDeepCluster/scDeepCluster_model_40.h5




Iter-48: ACC= 0.4505, NMI= 0.5422, ARI= 0.4010;  L= 0.21625, Lc= 0.08000,  Lr= 0.13626




Iter-56: ACC= 0.4514, NMI= 0.5451, ARI= 0.4026;  L= 0.21413, Lc= 0.08324,  Lr= 0.13089




Iter-64: ACC= 0.4533, NMI= 0.5455, ARI= 0.4020;  L= 0.23215, Lc= 0.08788,  Lr= 0.14427




Iter-72: ACC= 0.4576, NMI= 0.5536, ARI= 0.4083;  L= 0.24112, Lc= 0.08807,  Lr= 0.15305




Iter-80: ACC= 0.4591, NMI= 0.5568, ARI= 0.4090;  L= 0.23609, Lc= 0.08587,  Lr= 0.15022
saving model to: scDeepCluster/scDeepCluster_model_80.h5




Iter-88: ACC= 0.4605, NMI= 0.5608, ARI= 0.4099;  L= 0.24050, Lc= 0.08656,  Lr= 0.15394




Iter-96: ACC= 0.4610, NMI= 0.5622, ARI= 0.4108;  L= 0.24202, Lc= 0.08439,  Lr= 0.15764
delta_label  0.0009523809523809524 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5




Final: ACC= 0.4610, NMI= 0.5622, ARI= 0.4108
Clustering time: 2596 seconds.
>>> Dataset 10X_PBMC 
### Autoencoder: Successfully preprocessed 16653 genes and 4271 cells.
Sample size
(4271, 16653)
(4271,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 16
Save interval 80
Initializing cluster centers with k-means.




Iter-0: ACC= 0.5692, NMI= 0.6051, ARI= 0.4823;  L= 0.00000, Lc= 0.00000,  Lr= 0.00000
saving model to: scDeepCluster/scDeepCluster_model_0.h5




Iter-16: ACC= 0.5683, NMI= 0.6179, ARI= 0.4921;  L= 0.25252, Lc= 0.04577,  Lr= 0.20676




Iter-32: ACC= 0.5678, NMI= 0.6280, ARI= 0.5020;  L= 0.27521, Lc= 0.06456,  Lr= 0.21065




Iter-48: ACC= 0.5664, NMI= 0.6311, ARI= 0.5072;  L= 0.27894, Lc= 0.06484,  Lr= 0.21411




Iter-64: ACC= 0.5654, NMI= 0.6351, ARI= 0.5104;  L= 0.27765, Lc= 0.06400,  Lr= 0.21365




Iter-80: ACC= 0.5659, NMI= 0.6374, ARI= 0.5123;  L= 0.29123, Lc= 0.06838,  Lr= 0.22286
saving model to: scDeepCluster/scDeepCluster_model_80.h5




Iter-96: ACC= 0.5612, NMI= 0.6362, ARI= 0.5100;  L= 0.28344, Lc= 0.06586,  Lr= 0.21758




Iter-112: ACC= 0.5643, NMI= 0.6398, ARI= 0.5126;  L= 0.28407, Lc= 0.06148,  Lr= 0.22258




Iter-128: ACC= 0.5610, NMI= 0.6388, ARI= 0.5096;  L= 0.29056, Lc= 0.06036,  Lr= 0.23021




Iter-144: ACC= 0.5591, NMI= 0.6382, ARI= 0.5072;  L= 0.27636, Lc= 0.05225,  Lr= 0.22410




Iter-160: ACC= 0.5596, NMI= 0.6431, ARI= 0.5095;  L= 0.28845, Lc= 0.05537,  Lr= 0.23308
saving model to: scDeepCluster/scDeepCluster_model_160.h5




Iter-176: ACC= 0.5589, NMI= 0.6438, ARI= 0.5090;  L= 0.27605, Lc= 0.04524,  Lr= 0.23081




Iter-192: ACC= 0.5565, NMI= 0.6424, ARI= 0.5054;  L= 0.27805, Lc= 0.04665,  Lr= 0.23140




Iter-208: ACC= 0.5570, NMI= 0.6450, ARI= 0.5070;  L= 0.27245, Lc= 0.04105,  Lr= 0.23139




Iter-224: ACC= 0.5589, NMI= 0.6497, ARI= 0.5101;  L= 0.27943, Lc= 0.04374,  Lr= 0.23568




Iter-240: ACC= 0.5556, NMI= 0.6481, ARI= 0.5061;  L= 0.26967, Lc= 0.03759,  Lr= 0.23208
saving model to: scDeepCluster/scDeepCluster_model_240.h5




Iter-256: ACC= 0.5558, NMI= 0.6514, ARI= 0.5072;  L= 0.27538, Lc= 0.04065,  Lr= 0.23473
delta_label  0.0009365488176071178 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: ACC= 0.5558, NMI= 0.6514, ARI= 0.5072
Clustering time: 4574 seconds.


