In [1]:
# Set recommended library versions
# !pip install tensorflow==1.15.0
# !pip install keras==2.1.4
# !pip install pandas==1.0.4

import tensorflow as tf
tf.__version__

'1.15.0'

# Run scDeepCluster on the simulated data

In [2]:
"""
This part implements the scDeepCluster algoritm
"""

from time import time
import numpy as np
from keras.models import Model
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input, GaussianNoise, Layer, Activation
from keras.models import Model
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics

import h5py
import scanpy.api as sc
from scDeepCluster_layers import ConstantDispersionLayer, SliceLayer, ColWiseMultLayer
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, silhouette_score, calinski_harabasz_score
from scDeepCluster_loss import poisson_loss, NB, ZINB
from scDeepCluster_preprocess import read_dataset, normalize
import tensorflow as tf

from numpy.random import seed
seed(2211)
# tf.random.set_seed(2211)
from tensorflow import set_random_seed
set_random_seed(2211)

MeanAct = lambda x: tf.clip_by_value(K.exp(x), 1e-5, 1e6)
DispAct = lambda x: tf.clip_by_value(tf.nn.softplus(x), 1e-4, 1e4)

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size


def autoencoder(dims, noise_sd=0, init='glorot_uniform', act='relu'):
    """
    Fully connected auto-encoder model, symmetric.
    Arguments:
        dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
            The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
        act: activation, not applied to Input, Hidden and Output layers
    return:
        Model of autoencoder
    """
    n_stacks = len(dims) - 1
    # input
    sf_layer = Input(shape=(1,), name='size_factors')
    x = Input(shape=(dims[0],), name='counts')
    h = x
    h = GaussianNoise(noise_sd, name='input_noise')(h)
 
    # internal layers in encoder
    for i in range(n_stacks-1):
        h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h)
        h = GaussianNoise(noise_sd, name='noise_%d' % i)(h)    # add Gaussian noise
        h = Activation(act)(h)
    # hidden layer
    h = Dense(dims[-1], kernel_initializer=init, name='encoder_hidden')(h)  # hidden layer, features are extracted from here

    # internal layers in decoder
    for i in range(n_stacks-1, 0, -1):
        h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h)

    # output
 
    pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h)

    disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h)

    mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h)

    output = ColWiseMultLayer(name='output')([mean, sf_layer])
    output = SliceLayer(0, name='slice')([output, disp, pi])

    return Model(inputs=[x, sf_layer], outputs=output)


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape = (self.n_clusters, input_dim), 
                                        initializer='glorot_uniform',
                                        name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))



class SCDeepCluster(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 noise_sd=0,
                 alpha=1.0,
                 ridge=0,
                 debug=False):

        super(SCDeepCluster, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.noise_sd = noise_sd
        self.alpha = alpha
        self.act = 'relu'
        self.ridge = ridge
        self.debug = debug
        self.autoencoder = autoencoder(self.dims, noise_sd=self.noise_sd, act = self.act)
        
        # prepare clean encode model without Gaussian noise
        ae_layers = [l for l in self.autoencoder.layers]
        hidden = self.autoencoder.input[0]
        for i in range(1, len(ae_layers)):
            if "noise" in ae_layers[i].name:
                next
            elif "dropout" in ae_layers[i].name:
                next
            else:
                hidden = ae_layers[i](hidden)
            if "encoder_hidden" in ae_layers[i].name:  # only get encoder layers
                break
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        pi = self.autoencoder.get_layer(name='pi').output
        disp = self.autoencoder.get_layer(name='dispersion').output
        mean = self.autoencoder.get_layer(name='mean').output
        zinb = ZINB(pi, theta=disp, ridge_lambda=self.ridge, debug=self.debug)
        self.loss = zinb.loss

        clustering_layer = ClusteringLayer(self.n_clusters, alpha=self.alpha, name='clustering')(hidden)
        self.model = Model(inputs=[self.autoencoder.input[0], self.autoencoder.input[1]],
                           outputs=[clustering_layer, self.autoencoder.output])

        self.pretrained = False
        self.centers = []
        self.y_pred = []

    def pretrain(self, x, y, batch_size=256, epochs=200, optimizer='adam', ae_file='ae_weights.h5'):
        print('...Pretraining autoencoder...')
        self.autoencoder.compile(loss=self.loss, optimizer=optimizer)
        es = EarlyStopping(monitor="loss", patience=50, verbose=1)
        self.autoencoder.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, callbacks=[es], verbose = 0)
        self.autoencoder.save_weights(ae_file)
        print('Pretrained weights are saved to ./' + str(ae_file))
        self.pretrained = True

    def load_weights(self, weights_path):  # load weights of scDeepCluster model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):  # target distribution P which enhances the discrimination of soft label Q
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def fit(self, x_counts, sf, y, raw_counts, batch_size=256, maxiter=2e4, tol=1e-3, update_interval=140,
            ae_weights=None, save_dir='./output/pickle_results/scDeepCluster', loss_weights=[1,1], optimizer='adadelta'):

        self.model.compile(loss=['kld', self.loss], loss_weights=loss_weights, optimizer=optimizer)

        print('Update interval', update_interval)
        save_interval = int(x_counts.shape[0] / batch_size) * 5  # 5 epochs
        print('Save interval', save_interval)

        # Step 1: pretrain
        if not self.pretrained and ae_weights is None:
            print('...pretraining autoencoders using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=200')
            self.pretrain(x, batch_size)
            self.pretrained = True
        elif ae_weights is not None:
            self.autoencoder.load_weights(ae_weights)
            print('ae_weights is loaded successfully.')

        # Step 2: initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict([x_counts, sf]))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/scDeepCluster_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict([x_counts, sf], verbose=0)
                self.features = self.extract_feature([x_counts, sf])
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
#                     acc = np.round(cluster_acc(y, self.y_pred), 5)
                    nmi = np.round(metrics.normalized_mutual_info_score(y, self.y_pred), 5)
                    ari = np.round(metrics.adjusted_rand_score(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    
                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x_counts.shape[0]:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size::], sf[index * batch_size:]],
                                                 y=[p[index * batch_size::], raw_counts[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size:(index + 1) * batch_size], 
                                                    sf[index * batch_size:(index + 1) * batch_size]],
                                                 y=[p[index * batch_size:(index + 1) * batch_size],
                                                    raw_counts[index * batch_size:(index + 1) * batch_size]])
                index += 1

            # save intermediate model
#             if ite % save_interval == 0:
#                 # save scDeepCluster model checkpoints
#                 print('saving model to: ' + save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')
#                 self.model.save_weights(save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to: ' + save_dir + '/scDeepCluster_model_final.h5')
        self.model.save_weights(save_dir + '/scDeepCluster_model_final.h5')
        
        return self.y_pred

Using TensorFlow backend.






In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [3]:
import sys
sys.path.append("..")
import glob2
import os

In [4]:
category = "real_data"
df = pd.read_pickle(
    f"../output/pickle_results/{category}/{category}_scDeepCluster_full.pkl"
)
df

Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,worm_neuron_cell,0.45058,0.6688,-0.043893,0,3197.645042,"[3, 3, 6, 3, 8, 7, 9, 5, 1, 7, 9, 1, 9, 6, 1, ..."
1,worm_neuron_cell,0.59111,0.72651,-0.077288,1,3207.626115,"[5, 5, 2, 9, 4, 9, 9, 4, 7, 9, 1, 7, 9, 2, 7, ..."
2,mouse_ES_cell,0.53629,0.6665,0.021175,0,3817.521759,"[0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, ..."
3,mouse_ES_cell,0.87223,0.85359,0.002629,1,3718.663101,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ..."
4,mouse_bladder_cell,0.50613,0.69419,-0.035686,0,3721.444118,"[10, 9, 6, 13, 2, 6, 5, 2, 7, 8, 10, 7, 11, 5,..."
5,mouse_bladder_cell,0.57045,0.72771,-0.025315,1,3549.732326,"[15, 6, 14, 8, 10, 14, 6, 10, 13, 7, 4, 11, 3,..."
6,10X_PBMC,0.81013,0.79381,-0.0156,0,4288.175346,"[5, 5, 5, 0, 2, 2, 3, 3, 4, 1, 4, 2, 2, 2, 3, ..."
7,10X_PBMC,0.71036,0.76424,-0.022109,1,4255.594072,"[3, 3, 3, 5, 7, 5, 4, 4, 6, 4, 6, 7, 2, 0, 4, ..."
8,Quake_10x_Bladder,0.48326,0.63493,-0.022523,0,2565.571199,"[1, 0, 0, 2, 0, 3, 3, 1, 2, 3, 2, 1, 1, 0, 1, ..."
9,Quake_10x_Bladder,0.72893,0.76229,0.042024,1,2399.91595,"[3, 3, 1, 0, 1, 2, 0, 3, 0, 0, 0, 3, 3, 1, 3, ..."


In [None]:
for category in ["real_data",
#                  "balanced_data", "imbalanced_data"
                ]:

    path = ".."
    if category in ["balanced_data", "imbalanced_data"]:
        files = glob2.glob(f'{path}/R/simulated_data/{category}/*.h5')
        files = [
            f[len(f"{path}/R/simulated_data/{category}/"):-3] for f in files
        ]
    else:
        files = glob2.glob(f'{path}/real_data/*.h5')
        files = [f[len(f"{path}/real_data/"):-3] for f in files]
        files = [
            'worm_neuron_cell',
            'mouse_ES_cell',
            'mouse_bladder_cell',
            '10X_PBMC',
            'Quake_10x_Bladder',
            'Young',
            'Quake_10x_Limb_Muscle',
            'Adam',
            'Quake_Smart-seq2_Trachea',
            'Quake_Smart-seq2_Limb_Muscle',
            'Quake_Smart-seq2_Lung',
            'Romanov',
            'Muraro',
            'Quake_Smart-seq2_Diaphragm',
            'Quake_10x_Spleen',
        ]
    print(files)

#     df = pd.DataFrame(
#         columns=["dataset", "ARI", "NMI", "sil", "run", "time", "pred", "cal", "features"])
    df = pd.read_pickle(
                f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl"
            )
    print(df.shape)
    for dataset in files:
        if category in ["balanced_data", "imbalanced_data"]:
            data_mat = h5py.File(
                f"{path}/R/simulated_data/{category}/{dataset}.h5", "r")
        else:
            data_mat = h5py.File(f"{path}/real_data/{dataset}.h5", "r")

        y = np.array(data_mat['Y'])
        x = np.array(data_mat['X'])
        print(f">>>>dataset {dataset}")

        for run in [1]:
            start = time()
            #### Run scDeepCluster on the simulated data
            x = np.ceil(x).astype(np.int)
            optimizer1 = Adam(amsgrad=True)
            optimizer2 = 'adadelta'

            # preprocessing scRNA-seq read counts matrix
            adata = sc.AnnData(x)
            adata.obs['Group'] = y

            adata = read_dataset(adata,
                                 transpose=False,
                                 test_split=False,
                                 copy=True)

            adata = normalize(adata,
                              size_factors=True,
                              normalize_input=True,
                              logtrans_input=True)

            input_size = adata.n_vars

            print('Sample size')
            print(adata.X.shape)
            print(y.shape)

            x_sd = adata.X.std(0)
            x_sd_median = np.median(x_sd)

            update_interval = int(adata.X.shape[0] / 256)

            seed = run
            np.random.seed(seed)
            # Define scDeepCluster model
            scDeepCluster = SCDeepCluster(dims=[input_size, 256, 64, 32],
                                          n_clusters=np.unique(y).shape[0],
                                          noise_sd=2.5)

            t0 = time()

            # Pretrain autoencoders before clustering
            scDeepCluster.pretrain(x=[adata.X, adata.obs.size_factors],
                                   y=adata.raw.X,
                                   batch_size=256,
                                   epochs=600,
                                   optimizer=optimizer1,
                                   ae_file='ae_weights.h5')

            # begin clustering, time not include pretraining part.

            gamma = 1.  # set hyperparameter gamma
            scDeepCluster.fit(
                x_counts=adata.X,
                sf=adata.obs.size_factors,
                y=y,
                raw_counts=adata.raw.X,
                batch_size=256,
                tol=0.001,
                maxiter=20000,
                update_interval=update_interval,
                ae_weights=None,
                save_dir='scDeepCluster',
                loss_weights=[gamma, 1],
                optimizer=optimizer2)

            # Show the final results
            y_pred = scDeepCluster.y_pred
            nmi = np.round(
                metrics.normalized_mutual_info_score(y, scDeepCluster.y_pred),
                5)
            ari = np.round(
                metrics.adjusted_rand_score(y, scDeepCluster.y_pred), 5)
            print('Final: NMI= %.4f, ARI= %.4f' % (nmi, ari))

            elapsed = time() - start
            ss = silhouette_score(scDeepCluster.features, scDeepCluster.y_pred)
            cal = calinski_harabasz_score(scDeepCluster.features, scDeepCluster.y_pred)
            
            print(ss, cal)
            df.loc[df.shape[0]] = [
                dataset, ari, nmi, ss, run, elapsed, scDeepCluster.y_pred, cal, scDeepCluster.features
            ]

            df.to_pickle(
                f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl"
            )
            display(df)

['worm_neuron_cell', 'mouse_ES_cell', 'mouse_bladder_cell', '10X_PBMC', 'Quake_10x_Bladder', 'Young', 'Quake_10x_Limb_Muscle', 'Adam', 'Quake_Smart-seq2_Trachea', 'Quake_Smart-seq2_Limb_Muscle', 'Quake_Smart-seq2_Lung', 'Romanov', 'Muraro', 'Quake_Smart-seq2_Diaphragm', 'Quake_10x_Spleen']
(14, 9)
>>>>dataset worm_neuron_cell
### Autoencoder: Successfully preprocessed 13488 genes and 4186 cells.
Sample size
(4186, 13488)
(4186,)




...Pretraining autoencoder...




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where








Pretrained weights are saved to ./ae_weights.h5
Update interval 16
Save interval 80
Initializing cluster centers with k-means.
delta_label  0.0009555661729574773 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7067, ARI= 0.5894
0.7427282 7162.415015314816


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset mouse_ES_cell
### Autoencoder: Successfully preprocessed 24175 genes and 2717 cells.
Sample size
(2717, 24047)
(2717,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 10
Save interval 50
Initializing cluster centers with k-means.
delta_label  0.000736105999263894 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.8748, ARI= 0.9064
0.5564711 3372.2483823755606


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset mouse_bladder_cell
### Autoencoder: Successfully preprocessed 20670 genes and 2746 cells.
Sample size
(2746, 19771)
(2746,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 10
Save interval 50
Initializing cluster centers with k-means.
delta_label  0.0007283321194464676 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7168, ARI= 0.4968
0.8216775 12317.079813348875


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset 10X_PBMC
### Autoencoder: Successfully preprocessed 16653 genes and 4271 cells.
Sample size
(4271, 16653)
(4271,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 16
Save interval 80
Initializing cluster centers with k-means.
delta_label  0.0007024116132053383 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7714, ARI= 0.6913
0.778447 29146.292566362255


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_10x_Bladder
### Autoencoder: Successfully preprocessed 23341 genes and 2500 cells.
Sample size
(2500, 16867)
(2500,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 9
Save interval 45
Initializing cluster centers with k-means.
delta_label  0.0008 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.6808, ARI= 0.6072
0.75786555 8662.931397537664


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Young
### Autoencoder: Successfully preprocessed 33658 genes and 5685 cells.
Sample size
(5685, 25215)
(5685,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 22
Save interval 110
Initializing cluster centers with k-means.
delta_label  0.0007036059806508355 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.6464, ARI= 0.4984
0.75630766 16383.616749221064


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_10x_Limb_Muscle
### Autoencoder: Successfully preprocessed 23341 genes and 3909 cells.
Sample size
(3909, 16512)
(3909,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00025581990278843696 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.8532, ARI= 0.7616
0.82216513 24724.91052038154


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Adam
### Autoencoder: Successfully preprocessed 23797 genes and 3660 cells.
Sample size
(3660, 22350)
(3660,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 14
Save interval 70
Initializing cluster centers with k-means.
delta_label  0.000819672131147541 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.6854, ARI= 0.6045
0.6696519 5317.40533634242


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_Smart-seq2_Trachea
### Autoencoder: Successfully preprocessed 23341 genes and 1350 cells.
Sample size
(1350, 19992)
(1350,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 5
Save interval 25
Initializing cluster centers with k-means.
delta_label  0.0007407407407407407 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.5727, ARI= 0.5043
0.34746954 616.9772752450158


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_Smart-seq2_Limb_Muscle
### Autoencoder: Successfully preprocessed 23341 genes and 1090 cells.
Sample size
(1090, 18320)
(1090,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 4
Save interval 20
Initializing cluster centers with k-means.
delta_label  0.0009174311926605505 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7160, ARI= 0.5531
0.5981618 1352.0482789875189


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_Smart-seq2_Lung
### Autoencoder: Successfully preprocessed 23341 genes and 1676 cells.
Sample size
(1676, 19390)
(1676,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 6
Save interval 30
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7293, ARI= 0.4477
0.65059316 1861.6149909084122


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Romanov
### Autoencoder: Successfully preprocessed 21143 genes and 2881 cells.
Sample size
(2881, 21143)
(2881,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006942034015966678 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.5322, ARI= 0.4358
0.6083965 4119.935815504955


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Muraro
### Autoencoder: Successfully preprocessed 19046 genes and 2122 cells.
Sample size
(2122, 18819)
(2122,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 8
Save interval 40
Initializing cluster centers with k-means.
delta_label  0.000942507068803016 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7663, ARI= 0.6634
0.7085792 4342.638967956133


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


>>>>dataset Quake_Smart-seq2_Diaphragm
### Autoencoder: Successfully preprocessed 23341 genes and 870 cells.
Sample size
(870, 17973)
(870,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.7634, ARI= 0.6267
0.50997084 887.2712572380575


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred,cal,features
0,worm_neuron_cell,0.39011,0.63685,0.727489,0,5734.018851,"[9, 0, 2, 0, 6, 8, 9, 7, 4, 8, 1, 4, 9, 2, 4, ...",6994.542959,"[[-21.930113, -18.416706, 16.9666, 8.951159, -..."
1,mouse_ES_cell,0.76441,0.81447,0.439124,0,6524.775062,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...",1679.631676,"[[18.639498, -40.57001, -28.362339, 44.689766,..."
2,mouse_bladder_cell,0.45389,0.69899,0.844594,0,6410.961119,"[0, 15, 2, 12, 4, 2, 6, 1, 14, 5, 14, 3, 1, 6,...",14955.895466,"[[64.663925, 60.944817, 51.675034, 4.0672607, ..."
3,10X_PBMC,0.71981,0.74832,0.708076,0,7351.94478,"[1, 1, 1, 6, 0, 0, 4, 4, 7, 4, 7, 0, 0, 0, 4, ...",19096.54727,"[[1.7373607, 49.57687, -0.4803021, -3.6861176,..."
4,Quake_10x_Bladder,0.49464,0.63225,0.740653,0,4438.317755,"[3, 1, 1, 2, 1, 0, 0, 3, 2, 2, 2, 3, 3, 1, 3, ...",16640.857155,"[[-5.6711335, 73.285645, -5.73513, -42.752766,..."
5,Young,0.55478,0.6764,0.764555,0,15279.941896,"[2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...",13626.540961,"[[-64.16738, -9.417805, -84.29984, 1.4256966, ..."
6,Quake_10x_Limb_Muscle,0.75781,0.8461,0.76726,0,6549.631944,"[1, 0, 4, 4, 5, 5, 2, 5, 1, 5, 5, 5, 0, 2, 4, ...",16967.87183,"[[3.5009642, -102.24696, 17.305931, -34.064503..."
7,Adam,0.68514,0.75755,0.685611,0,8687.843653,"[2, 5, 2, 5, 5, 5, 5, 5, 2, 5, 5, 5, 0, 2, 2, ...",5527.968638,"[[-33.417835, -51.57393, 19.480848, -34.194695..."
8,Quake_Smart-seq2_Trachea,0.4888,0.54016,0.335755,0,2786.670206,"[3, 3, 3, 3, 3, 0, 3, 3, 2, 0, 0, 3, 0, 0, 0, ...",539.010467,"[[24.5902, -23.043766, 2.9257927, 13.214923, -..."
9,Quake_Smart-seq2_Limb_Muscle,0.5563,0.70905,0.442972,0,2102.218355,"[4, 0, 1, 0, 0, 0, 5, 2, 0, 0, 0, 0, 2, 4, 2, ...",624.575341,"[[9.356872, 0.97938836, -11.655985, -40.025845..."


In [None]:
df.groupby("dataset").mean()

In [None]:
df