# Run scDeepCluster on the simulated data

In [1]:
"""
This part implements the scDeepCluster algoritm
"""

from time import time
import numpy as np
from keras.models import Model
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input, GaussianNoise, Layer, Activation
from keras.models import Model
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.callbacks import EarlyStopping
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics

import h5py
import scanpy.api as sc
from scDeepCluster_layers import ConstantDispersionLayer, SliceLayer, ColWiseMultLayer
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, silhouette_score
from scDeepCluster_loss import poisson_loss, NB, ZINB
from scDeepCluster_preprocess import read_dataset, normalize
import tensorflow as tf

from numpy.random import seed
seed(2211)
from tensorflow import set_random_seed
set_random_seed(2211)

MeanAct = lambda x: tf.clip_by_value(K.exp(x), 1e-5, 1e6)
DispAct = lambda x: tf.clip_by_value(tf.nn.softplus(x), 1e-4, 1e4)

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size


def autoencoder(dims, noise_sd=0, init='glorot_uniform', act='relu'):
    """
    Fully connected auto-encoder model, symmetric.
    Arguments:
        dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
            The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
        act: activation, not applied to Input, Hidden and Output layers
    return:
        Model of autoencoder
    """
    n_stacks = len(dims) - 1
    # input
    sf_layer = Input(shape=(1,), name='size_factors')
    x = Input(shape=(dims[0],), name='counts')
    h = x
    h = GaussianNoise(noise_sd, name='input_noise')(h)
 
    # internal layers in encoder
    for i in range(n_stacks-1):
        h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h)
        h = GaussianNoise(noise_sd, name='noise_%d' % i)(h)    # add Gaussian noise
        h = Activation(act)(h)
    # hidden layer
    h = Dense(dims[-1], kernel_initializer=init, name='encoder_hidden')(h)  # hidden layer, features are extracted from here

    # internal layers in decoder
    for i in range(n_stacks-1, 0, -1):
        h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h)

    # output
 
    pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h)

    disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h)

    mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h)

    output = ColWiseMultLayer(name='output')([mean, sf_layer])
    output = SliceLayer(0, name='slice')([output, disp, pi])

    return Model(inputs=[x, sf_layer], outputs=output)


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape = (self.n_clusters, input_dim), 
                                        initializer='glorot_uniform',
                                        name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))



class SCDeepCluster(object):
    def __init__(self,
                 dims,
                 n_clusters=10,
                 noise_sd=0,
                 alpha=1.0,
                 ridge=0,
                 debug=False):

        super(SCDeepCluster, self).__init__()

        self.dims = dims
        self.input_dim = dims[0]
        self.n_stacks = len(self.dims) - 1

        self.n_clusters = n_clusters
        self.noise_sd = noise_sd
        self.alpha = alpha
        self.act = 'relu'
        self.ridge = ridge
        self.debug = debug
        self.autoencoder = autoencoder(self.dims, noise_sd=self.noise_sd, act = self.act)
        
        # prepare clean encode model without Gaussian noise
        ae_layers = [l for l in self.autoencoder.layers]
        hidden = self.autoencoder.input[0]
        for i in range(1, len(ae_layers)):
            if "noise" in ae_layers[i].name:
                next
            elif "dropout" in ae_layers[i].name:
                next
            else:
                hidden = ae_layers[i](hidden)
            if "encoder_hidden" in ae_layers[i].name:  # only get encoder layers
                break
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)

        pi = self.autoencoder.get_layer(name='pi').output
        disp = self.autoencoder.get_layer(name='dispersion').output
        mean = self.autoencoder.get_layer(name='mean').output
        zinb = ZINB(pi, theta=disp, ridge_lambda=self.ridge, debug=self.debug)
        self.loss = zinb.loss

        clustering_layer = ClusteringLayer(self.n_clusters, alpha=self.alpha, name='clustering')(hidden)
        self.model = Model(inputs=[self.autoencoder.input[0], self.autoencoder.input[1]],
                           outputs=[clustering_layer, self.autoencoder.output])

        self.pretrained = False
        self.centers = []
        self.y_pred = []

    def pretrain(self, x, y, batch_size=256, epochs=200, optimizer='adam', ae_file='ae_weights.h5'):
        print('...Pretraining autoencoder...')
        self.autoencoder.compile(loss=self.loss, optimizer=optimizer)
        es = EarlyStopping(monitor="loss", patience=50, verbose=0)
        self.autoencoder.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, callbacks=[es], verbose = 0)
        self.autoencoder.save_weights(ae_file)
        print('Pretrained weights are saved to ./' + str(ae_file))
        self.pretrained = True

    def load_weights(self, weights_path):  # load weights of scDeepCluster model
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict_clusters(self, x):  # predict cluster labels using the output of clustering layer
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):  # target distribution P which enhances the discrimination of soft label Q
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def fit(self, x_counts, sf, y, raw_counts, batch_size=256, maxiter=2e4, tol=1e-3, update_interval=140,
            ae_weights=None, save_dir='./output/pickle_results/scDeepCluster', loss_weights=[1,1], optimizer='adadelta'):

        self.model.compile(loss=['kld', self.loss], loss_weights=loss_weights, optimizer=optimizer)

        print('Update interval', update_interval)
        save_interval = int(x_counts.shape[0] / batch_size) * 5  # 5 epochs
        print('Save interval', save_interval)

        # Step 1: pretrain
        if not self.pretrained and ae_weights is None:
            print('...pretraining autoencoders using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=200')
            self.pretrain(x, batch_size)
            self.pretrained = True
        elif ae_weights is not None:
            self.autoencoder.load_weights(ae_weights)
            print('ae_weights is loaded successfully.')

        # Step 2: initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict([x_counts, sf]))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/scDeepCluster_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict([x_counts, sf], verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
#                     acc = np.round(cluster_acc(y, self.y_pred), 5)
                    nmi = np.round(metrics.normalized_mutual_info_score(y, self.y_pred), 5)
                    ari = np.round(metrics.adjusted_rand_score(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    
                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x_counts.shape[0]:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size::], sf[index * batch_size:]],
                                                 y=[p[index * batch_size::], raw_counts[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=[x_counts[index * batch_size:(index + 1) * batch_size], 
                                                    sf[index * batch_size:(index + 1) * batch_size]],
                                                 y=[p[index * batch_size:(index + 1) * batch_size],
                                                    raw_counts[index * batch_size:(index + 1) * batch_size]])
                index += 1

            # save intermediate model
#             if ite % save_interval == 0:
#                 # save scDeepCluster model checkpoints
#                 print('saving model to: ' + save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')
#                 self.model.save_weights(save_dir + '/scDeepCluster_model_' + str(ite) + '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to: ' + save_dir + '/scDeepCluster_model_final.h5')
        self.model.save_weights(save_dir + '/scDeepCluster_model_final.h5')
        
        return self.y_pred

Using TensorFlow backend.

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [2]:
import sys
sys.path.append("..")
import gnn_utils
import glob2
import os
# import time

Using backend: pytorch


In [3]:
# category = "real_data"
for category in [ "balanced_data", "imbalanced_data"# "real_data",
                ]:

    path= ".."
    if category in ["balanced_data", "imbalanced_data"]:
        files = glob2.glob(f'{path}/R/simulated_data/{category}/*.h5')
        files = [f[len(f"{path}/R/simulated_data/{category}/"):-3] for f in files]
    else:
        files = glob2.glob(f'{path}/real_data/*.h5')
        files = [f[len(f"{path}/real_data/"):-3] for f in files]
    print(files)
#     if category == "real_data":
#         df = pd.read_pickle(f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl")
#     else:
    df = pd.DataFrame(columns = ["dataset", "ARI", "NMI", "sil", "run", "time", "pred"])
    for dataset in files:
        if category in ["balanced_data", "imbalanced_data"]:
            data_mat = h5py.File(f"{path}/R/simulated_data/{category}/{dataset}.h5","r")
        else:
            data_mat = h5py.File(f"{path}/real_data/{dataset}.h5","r")

        y = np.array(data_mat['Y'])
        x = np.array(data_mat['X'])
        print(f">>>>dataset {dataset}")
        if x.shape[0] > 10000 or dataset == "Quake_10x_Spleen":
            continue
        for run in range(2):
            start = time()
            #### Run scDeepCluster on the simulated data
            x = np.ceil(x).astype(np.int)
            optimizer1 = Adam(amsgrad=True)
            optimizer2 = 'adadelta'


            # preprocessing scRNA-seq read counts matrix
            adata = sc.AnnData(x)
            adata.obs['Group'] = y

            adata = read_dataset(adata, transpose=False, test_split=False, copy=True)

            adata = normalize(adata,
                              size_factors=True,
                              normalize_input=True,
                              logtrans_input=True)

            input_size = adata.n_vars

            print('Sample size')
            print(adata.X.shape)
            print(y.shape)

            x_sd = adata.X.std(0)
            x_sd_median = np.median(x_sd)

            update_interval = int(adata.X.shape[0] / 256)

            seed = run
            np.random.seed(seed)
            # Define scDeepCluster model
            scDeepCluster = SCDeepCluster(dims=[input_size, 256, 64, 32],
                                          n_clusters=3,
                                          noise_sd=2.5)

            t0 = time()

            # Pretrain autoencoders before clustering
#             scDeepCluster.pretrain(x=[adata.X, adata.obs.size_factors],
#                                    y=adata.raw.X,
            scDeepCluster.pretrain(x=[adata.X, adata.obs.size_factors],
                                   y=x[:, np.array(adata.var_names).astype(int)],
                                   batch_size=256,
                                   epochs=600,
                                   optimizer=optimizer1,
                                   ae_file='ae_weights.h5')

            # begin clustering, time not include pretraining part.

            gamma = 1.  # set hyperparameter gamma
            scDeepCluster.fit(x_counts=adata.X,
                              sf=adata.obs.size_factors,
                              y=y,
                              raw_counts=x[:, np.array(adata.var_names).astype(int)],
#                               raw_counts=adata.raw.X,
                              batch_size=256,
                              tol=0.001,
                              maxiter=20000,
                              update_interval=update_interval,
                              ae_weights=None,
                              save_dir='scDeepCluster',
                              loss_weights=[gamma, 1],
                              optimizer=optimizer2)

            # Show the final results
            y_pred = scDeepCluster.y_pred
            nmi = np.round(metrics.normalized_mutual_info_score(y, scDeepCluster.y_pred),
                           5)
            ari = np.round(metrics.adjusted_rand_score(y, scDeepCluster.y_pred), 5)
            print('Final: NMI= %.4f, ARI= %.4f' % ( nmi, ari))
            print('Clustering time: %d seconds.' % int(time() - t0))

            elapsed = time() - start
            ss = silhouette_score(adata.X,scDeepCluster.y_pred)

            df.loc[df.shape[0]] = [dataset, ari, nmi, ss, run, elapsed, scDeepCluster.y_pred]


            df.to_pickle(f"../output/pickle_results/{category}/{category}_scDeepCluster.pkl")
            display(df)

['data_1c8', 'data_-1c4', 'data_-1c8', 'data_0c4', 'data_0c8', 'data_0c16', 'data_1.5c4', 'data_1c4', 'data_1.5c8', 'data_1.5c16', 'data_-1c16', 'data_1c16']
>>>>dataset data_1c8
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1011)
(2000,)
...Pretraining autoencoder...



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0045, ARI= 0.0012
Clustering time: 124 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1011)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0052, ARI= 0.0022
Clustering time: 130 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."


>>>>dataset data_-1c4
### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0198, ARI= 0.0198
Clustering time: 67 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0538, ARI= 0.0457
Clustering time: 66 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."


>>>>dataset data_-1c8
### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1010)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0492, ARI= 0.0320
Clustering time: 136 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1010)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0364, ARI= 0.0213
Clustering time: 137 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."


>>>>dataset data_0c4
### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 999)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0104, ARI= 0.0076
Clustering time: 77 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 999)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0076, ARI= 0.0051
Clustering time: 79 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."


>>>>dataset data_0c8
### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1009)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0115, ARI= 0.0062
Clustering time: 145 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1009)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0239, ARI= 0.0143
Clustering time: 147 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_0c16
### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1018)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0060, ARI= 0.0019
Clustering time: 273 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1018)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0039, ARI= 0.0009
Clustering time: 278 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_1.5c4
### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0047, ARI= 0.0022
Clustering time: 87 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0068, ARI= 0.0045
Clustering time: 86 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_1c4
### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0022, ARI= -0.0003
Clustering time: 86 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 1000 cells.
Sample size
(1000, 1000)
(1000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 3
Save interval 15
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0144, ARI= 0.0130
Clustering time: 105 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_1.5c8
### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1019)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0079, ARI= 0.0012
Clustering time: 167 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 2000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(2000, 1019)
(2000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 7
Save interval 35
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0060, ARI= 0.0019
Clustering time: 159 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_1.5c16
### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1034)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00025 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0032, ARI= 0.0006
Clustering time: 307 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1034)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0030, ARI= 0.0005
Clustering time: 311 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_-1c16
### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1012)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0262, ARI= 0.0109
Clustering time: 302 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1012)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.0005 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0280, ARI= 0.0122
Clustering time: 307 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


>>>>dataset data_1c16
### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1028)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0025, ARI= 0.0003
Clustering time: 375 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 4000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(4000, 1028)
(4000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 15
Save interval 75
Initializing cluster centers with k-means.
delta_label  0.00075 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0028, ARI= 0.0004
Clustering time: 392 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00123,0.00453,-0.001848,0,125.14909,"[1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 2, 1, 0, 2, ..."
1,data_1c8,0.00219,0.00518,-0.001508,1,130.802194,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 2, 2, ..."
2,data_-1c4,0.01984,0.01981,-0.005856,0,68.408933,"[1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
3,data_-1c4,0.04572,0.05381,-0.005066,1,66.717075,"[2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 0, ..."
4,data_-1c8,0.032,0.04917,-0.003668,0,136.69523,"[0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 2, 0, 2, ..."
5,data_-1c8,0.02129,0.03641,-0.005487,1,138.333669,"[0, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 0, 1, 2, ..."
6,data_0c4,0.00764,0.01036,-0.00555,0,77.880029,"[0, 0, 1, 0, 1, 1, 2, 2, 1, 0, 2, 1, 2, 0, 2, ..."
7,data_0c4,0.0051,0.00756,-0.004005,1,79.669937,"[1, 0, 1, 1, 0, 0, 2, 2, 1, 2, 0, 0, 2, 1, 2, ..."
8,data_0c8,0.0062,0.01145,-0.004063,0,145.849358,"[2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, ..."
9,data_0c8,0.01429,0.02392,-0.00617,1,147.999597,"[1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, ..."


['data_1c8', 'data_-1c4', 'data_-1c8', 'data_0c4', 'data_0c8', 'data_0c16', 'data_1.5c4', 'data_1c4', 'data_1.5c8', 'data_1.5c16', 'data_-1c16', 'data_1c16']
>>>>dataset data_1c8
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1019)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0076, ARI= 0.0034
Clustering time: 288 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1019)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0025, ARI= 0.0007
Clustering time: 316 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."


>>>>dataset data_-1c4
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1007)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.3100, ARI= 0.3558
Clustering time: 295 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1007)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0470, ARI= 0.0269
Clustering time: 335 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."


>>>>dataset data_-1c8
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1005)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0058, ARI= 0.0021
Clustering time: 316 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1005)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0104, ARI= 0.0090
Clustering time: 307 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."


>>>>dataset data_0c4
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0235, ARI= 0.0255
Clustering time: 332 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0116, ARI= 0.0145
Clustering time: 317 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."


>>>>dataset data_0c8
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0072, ARI= 0.0035
Clustering time: 363 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0043, ARI= 0.0014
Clustering time: 348 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_0c16
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1008)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0047, ARI= 0.0012
Clustering time: 346 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1008)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0044, ARI= 0.0003
Clustering time: 345 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_1.5c4
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1022)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0065, ARI= 0.0071
Clustering time: 335 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1022)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0052, ARI= 0.0049
Clustering time: 349 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_1c4
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1021)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0014, ARI= 0.0002
Clustering time: 363 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1021)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0078, ARI= 0.0072
Clustering time: 373 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_1.5c8
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1018)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0034, ARI= 0.0020
Clustering time: 352 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1018)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0039, ARI= 0.0006
Clustering time: 373 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_1.5c16
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1030)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0031, ARI= 0.0003
Clustering time: 396 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1030)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0024, ARI= -0.0001
Clustering time: 376 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_-1c16
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1008)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0003333333333333333 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0079, ARI= 0.0030
Clustering time: 411 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1008)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0083, ARI= 0.0035
Clustering time: 357 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


>>>>dataset data_1c16
### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0046, ARI= 0.0008
Clustering time: 372 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


### Autoencoder: Successfully preprocessed 2500 genes and 3000 cells.


  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  ) / disp_std_bin[df['mean_bin'].values].values


Sample size
(3000, 1014)
(3000,)
...Pretraining autoencoder...
Pretrained weights are saved to ./ae_weights.h5
Update interval 11
Save interval 55
Initializing cluster centers with k-means.
delta_label  0.0006666666666666666 < tol  0.001
Reached tolerance threshold. Stopping training.
saving model to: scDeepCluster/scDeepCluster_model_final.h5
Final: NMI= 0.0064, ARI= 0.0021
Clustering time: 389 seconds.


Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."


In [4]:
df.groupby("dataset").mean()

Unnamed: 0_level_0,ARI,NMI,sil,time
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
data_-1c16,0.003235,0.00807,-0.005798,385.226593
data_-1c4,0.19136,0.17849,-0.004302,316.368312
data_-1c8,0.00553,0.008095,-0.005136,312.727852
data_0c16,0.00076,0.004575,-0.003572,346.865553
data_0c4,0.019995,0.01752,-0.004569,325.692704
data_0c8,0.002415,0.00575,-0.005521,356.733258
data_1.5c16,6.5e-05,0.002735,-0.003888,387.191149
data_1.5c4,0.00601,0.0058,-0.002607,343.365302
data_1.5c8,0.001335,0.003655,-0.004087,363.671355
data_1c16,0.00145,0.005485,-0.002549,381.995809


In [5]:
df

Unnamed: 0,dataset,ARI,NMI,sil,run,time,pred
0,data_1c8,0.00337,0.00762,-0.005513,0,288.890893,"[0, 0, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 2, 0, ..."
1,data_1c8,0.00067,0.00248,-0.002577,1,317.008204,"[2, 1, 2, 1, 2, 2, 1, 0, 1, 1, 1, 0, 2, 1, 0, ..."
2,data_-1c4,0.35583,0.31002,-0.002106,0,296.868644,"[1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 0, 1, 2, 0, ..."
3,data_-1c4,0.02689,0.04696,-0.006497,1,335.86798,"[2, 0, 1, 0, 2, 1, 0, 1, 0, 2, 1, 2, 2, 0, 0, ..."
4,data_-1c8,0.0021,0.00577,-0.004487,0,317.399225,"[0, 1, 2, 1, 0, 1, 2, 1, 1, 1, 0, 2, 0, 1, 2, ..."
5,data_-1c8,0.00896,0.01042,-0.005784,1,308.056479,"[0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, ..."
6,data_0c4,0.02545,0.02346,-0.004868,0,332.784253,"[0, 1, 2, 1, 0, 2, 1, 2, 0, 1, 2, 2, 0, 1, 2, ..."
7,data_0c4,0.01454,0.01158,-0.004269,1,318.601155,"[1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0, 2, 1, 2, 2, ..."
8,data_0c8,0.00347,0.00723,-0.005592,0,364.282243,"[2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 0, 2, 1, 2, ..."
9,data_0c8,0.00136,0.00427,-0.00545,1,349.184273,"[2, 1, 1, 0, 1, 2, 0, 1, 0, 1, 1, 0, 2, 1, 2, ..."
