# CarDEC Liver Runtime

In this notebook, we will analyze the scaleability of the CarDEC method to larger datasets. We fit CarDEC on various percentages of the Liver dataset (which contains over 100000 cells). We fit CarDEC on subsets of this data ranging from 10% up to 100% of the full liver data. We profile CarDEC both with and without the supplementary count space modeling (CarDEC Count is the runtime including count modeling).

In [1]:
"""Broadly useful python packages"""
import pandas as pd
import os
import numpy as np
import pickle
from copy import deepcopy
from shutil import move, rmtree
import warnings
from memory_profiler import memory_usage
from time import time

"""Machine learning and single cell packages"""
import sklearn.metrics as metrics
from sklearn.metrics import adjusted_rand_score as ari, normalized_mutual_info_score as nmi
import scanpy as sc
from anndata import AnnData
import seaborn as sns

"""CarDEC Package"""
from CarDEC import CarDEC_API

  from pandas.core.index import RangeIndex


In [2]:
"""Miscellaneous useful functions"""

def read_liver_data(path, cache=True):
    adata = sc.read_mtx(os.path.join(path, 'matrix.mtx')).T
    genes_file = pd.read_csv(os.path.join(path, 'genes.tsv'), sep='\t')
    barcodes_file = pd.read_csv(os.path.join(path, 'barcodes.tsv'), sep='\t')

    adata.var.index = genes_file["genename"]
    adata.obs.index = barcodes_file["cellname"]
    adata.obs = barcodes_file
        
    sc.pp.filter_cells(adata, min_genes = 200)
    mito_genes = adata.var_names.str.startswith('mt-')
    adata.obs['percent_mito'] = np.sum(
        adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1
    adata.obs['n_counts'] = adata.X.sum(axis=1).A1
    adata = adata[adata.obs['percent_mito'] < 0.2, :]
    sc.pp.filter_genes(adata, min_cells = 30)

    return adata

def build_dir(dir_path):
    subdirs = [dir_path]
    substring = dir_path

    while substring != '':
        splt_dir = os.path.split(substring)
        substring = splt_dir[0]
        subdirs.append(substring)
        
    subdirs.pop()
    subdirs = [x for x in subdirs if os.path.basename(x) != '..']

    n = len(subdirs)
    subdirs = [subdirs[n - 1 - x] for x in range(n)]
    
    for dir_ in subdirs:
        if not os.path.isdir(dir_):
            os.mkdir(dir_)
            
def run_cardec(adata, include_counts):
    CarDEC = CarDEC_API(adata, weights_dir = "temp", batch_key = "sampleid", n_high_var = 2000)
    del adata
    CarDEC.build_model(n_clusters = 11)
    if include_counts:
        CarDEC.make_inference(denoise_all = False)
        CarDEC.model_counts()
    else:
        CarDEC.make_inference()
        
def profile(frac):
    np.random.seed(11111)
    indices = np.random.choice(range(adata.shape[0]), size = round(frac * adata.shape[0]), replace = False)
    tmp = adata.copy()[indices]
    
    sc.pp.filter_genes(tmp, min_cells = 1)
    start = time()
    run = memory_usage((run_cardec, (), {'adata': tmp, 'include_counts': False}))
    final = time() - start
    peak_memory = max(run) - min(run)
    stats_zscore = final, peak_memory, "CarDEC Zscore", int(100*frac)
    rmtree("temp")
    
    tmp = adata.copy()[indices]
    
    sc.pp.filter_genes(tmp, min_cells = 1)
    start = time()
    run = memory_usage((run_cardec, (), {'adata': tmp, 'include_counts': True}))
    final = time() - start
    peak_memory = max(run) - min(run)
    stats_count = final, peak_memory, "CarDEC Count", int(100*frac)
    rmtree("temp")
    
    return stats_zscore , stats_count

## Figure Data

In [3]:
build_dir("../Figures/liver")
profile_stats = {"Time (Seconds)": [] , "Memory (MiB)": [], "Method": [], 'Percent': []}
profile_stats = pd.DataFrame(profile_stats)

## Read in Data

In [4]:
adata = read_liver_data("../Data/liver", cache = True)

Transforming to str index.
Trying to set attribute `.var` of view, copying.


## Profile Memory and Speed

In [5]:
fracs = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]

n = 0
for frac in fracs:
    profile_stats.loc[n], profile_stats.loc[n+1] = profile(frac)
    n = n + 2

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.939, Validation Loss: 0.868, Time: 1.7 s
Epoch 001: Training Loss: 0.847, Validation Loss: 0.824, Time: 1.6 s
Epoch 002: Training Loss: 0.791, Validation Loss: 0.754, Time: 1.6 s
Epoch 003: Training Loss: 0.757, Validation Loss: 0.744, Time: 1.6 s
Epoch 004: Training Loss: 0.736, Validation Loss: 0.732, Time: 1.6 s
Epoch 005: Training Loss: 0.718, Validation Loss: 0.708, Time: 1.6 s
Epoch 006: Training Loss: 0.707, Validation Loss: 0.705, Time: 1.6 s
Epoch 007: Training Loss: 0.699, Validation Loss: 0.691, Time: 1.6 s
Epoch 008: Training Loss: 0.691, Validation Loss: 0.689, Time: 1.5 s
Epoch 009: Training Loss: 0.685, Validation Loss: 0.677, Time: 1.5 s
Epoch 010: Training Loss: 0.682, Validation Loss: 0.675, Time: 1.5 s
Epoch 011: Training Loss: 0.679, Validation Loss: 0.681, Time: 1.5 s
Epoch 012: Training Loss: 0.675, Validation Loss: 0.673, Time: 1.5 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/rp_tree.py", line 135:
@numba.njit(fastmath=True, nogil=True, parallel=True)
def euclidean_random_projection_split(data, indices, rng_state):
^

  state.func_ir.loc))
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/utils.py", line 409:
@numba.njit(parallel=True)
def build_candidates(current_graph, n_vertices, n_neighbors, max_candidates, rng_state):
^

  current_graph, 


 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2499744   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2523304   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,545,880
Trainable params: 5,545,880
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.939, Validation Loss: 0.868, Time: 1.6 s
Epoch 001: Training Loss: 0.847, Validation Loss: 0.824, Time: 1.6 s
Epoch 002: Training Loss: 0.791, Validation Loss: 0.754, Time: 1.5 s
Epoch 003: Training Loss: 0.757, Validation Loss: 0.744, Time: 1.5 s
Epoch 004: Training Loss: 0.736, Validation Loss: 0.732, Time: 1.5 s
Epoch 005: Training Loss: 0.718, Validation Loss: 0.708, Time: 1.5 s
Epoch 006: Training Loss: 0.707, Validation Loss: 0.705, Time: 1.4 s
Epoch 007: Training Loss: 0.699, Validation Loss: 0.691, Time: 1.4 s
Epoch 008: Training Loss: 0.691, Validation Loss: 0.689, Time: 1.4 s
Epoch 009: Training Loss: 0.685, Validation Loss: 0.677, Time: 1.4 s
Epoch 010: Training Loss: 0.682, Validation Loss: 0.675, Time: 1.4 s
Epoch 011: Training Loss: 0.679, Validation Loss: 0.681, Time: 1.4 s
Epoch 012: Training Loss: 0.675, Validation Loss: 0.673, Time: 1.4 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2499744   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2523304   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,545,880
Trainable params: 5,545,880
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.903, Validation Loss: 0.819, Time: 3.3 s
Epoch 001: Training Loss: 0.789, Validation Loss: 0.747, Time: 3.0 s
Epoch 002: Training Loss: 0.743, Validation Loss: 0.717, Time: 3.0 s
Epoch 003: Training Loss: 0.720, Validation Loss: 0.700, Time: 3.0 s
Epoch 004: Training Loss: 0.706, Validation Loss: 0.688, Time: 2.9 s
Epoch 005: Training Loss: 0.695, Validation Loss: 0.680, Time: 2.9 s
Epoch 006: Training Loss: 0.688, Validation Loss: 0.677, Time: 2.9 s
Epoch 007: Training Loss: 0.682, Validation Loss: 0.673, Time: 2.9 s
Epoch 008: Training Loss: 0.679, Validation Loss: 0.670, Time: 2.9 s
Epoch 009: Training Loss: 0.675, Validation Loss: 0.666, Time: 2.8 s
Epoch 010: Training Loss: 0.672, Validation Loss: 0.666, Time: 2.9 s
Epoch 011: Training Loss: 0.669, Validation Loss: 0.663, Time: 2.8 s
Epoch 012: Training Loss: 0.667, Validation Loss: 0.662, Time: 2.8 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502816   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526400   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,048
Trainable params: 5,552,048
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.903, Validation Loss: 0.819, Time: 3.2 s
Epoch 001: Training Loss: 0.789, Validation Loss: 0.747, Time: 2.9 s
Epoch 002: Training Loss: 0.743, Validation Loss: 0.717, Time: 3.0 s
Epoch 003: Training Loss: 0.720, Validation Loss: 0.700, Time: 2.9 s
Epoch 004: Training Loss: 0.706, Validation Loss: 0.688, Time: 2.8 s
Epoch 005: Training Loss: 0.695, Validation Loss: 0.680, Time: 2.8 s
Epoch 006: Training Loss: 0.688, Validation Loss: 0.677, Time: 2.8 s
Epoch 007: Training Loss: 0.682, Validation Loss: 0.673, Time: 2.9 s
Epoch 008: Training Loss: 0.679, Validation Loss: 0.670, Time: 2.8 s
Epoch 009: Training Loss: 0.675, Validation Loss: 0.666, Time: 2.9 s
Epoch 010: Training Loss: 0.672, Validation Loss: 0.666, Time: 2.9 s
Epoch 011: Training Loss: 0.669, Validation Loss: 0.663, Time: 2.8 s
Epoch 012: Training Loss: 0.667, Validation Loss: 0.662, Time: 2.8 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502816   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526400   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,048
Trainable params: 5,552,048
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.850, Validation Loss: 0.802, Time: 6.3 s
Epoch 001: Training Loss: 0.741, Validation Loss: 0.754, Time: 6.0 s
Epoch 002: Training Loss: 0.710, Validation Loss: 0.732, Time: 5.8 s
Epoch 003: Training Loss: 0.693, Validation Loss: 0.718, Time: 5.8 s
Epoch 004: Training Loss: 0.685, Validation Loss: 0.714, Time: 5.8 s
Epoch 005: Training Loss: 0.679, Validation Loss: 0.708, Time: 6.0 s
Epoch 006: Training Loss: 0.674, Validation Loss: 0.709, Time: 6.0 s
Epoch 007: Training Loss: 0.670, Validation Loss: 0.700, Time: 5.8 s
Epoch 008: Training Loss: 0.667, Validation Loss: 0.701, Time: 5.9 s
Epoch 009: Training Loss: 0.664, Validation Loss: 0.695, Time: 5.8 s
Epoch 010: Training Loss: 0.662, Validation Loss: 0.695, Time: 5.8 s
Epoch 011: Training Loss: 0.660, Validation Loss: 0.692, Time: 5.9 s
Epoch 012: Training Loss: 0.658, Validation Loss: 0.690, Time: 6.0 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 10 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  320       
Total params: 5,552,273
Trainable params: 5,552,273
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.850, Validation Loss: 0.802, Time: 6.7 s
Epoch 001: Training Loss: 0.741, Validation Loss: 0.754, Time: 6.1 s
Epoch 002: Training Loss: 0.710, Validation Loss: 0.732, Time: 6.1 s
Epoch 003: Training Loss: 0.693, Validation Loss: 0.718, Time: 6.0 s
Epoch 004: Training Loss: 0.685, Validation Loss: 0.714, Time: 5.8 s
Epoch 005: Training Loss: 0.679, Validation Loss: 0.708, Time: 5.8 s
Epoch 006: Training Loss: 0.674, Validation Loss: 0.709, Time: 5.8 s
Epoch 007: Training Loss: 0.670, Validation Loss: 0.700, Time: 5.9 s
Epoch 008: Training Loss: 0.667, Validation Loss: 0.701, Time: 5.8 s
Epoch 009: Training Loss: 0.664, Validation Loss: 0.695, Time: 5.8 s
Epoch 010: Training Loss: 0.662, Validation Loss: 0.695, Time: 5.8 s
Epoch 011: Training Loss: 0.660, Validation Loss: 0.692, Time: 5.8 s
Epoch 012: Training Loss: 0.658, Validation Loss: 0.690, Time: 5.8 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.824, Validation Loss: 0.752, Time: 9.7 s
Epoch 001: Training Loss: 0.721, Validation Loss: 0.721, Time: 8.9 s
Epoch 002: Training Loss: 0.701, Validation Loss: 0.706, Time: 8.8 s
Epoch 003: Training Loss: 0.691, Validation Loss: 0.707, Time: 8.9 s
Epoch 004: Training Loss: 0.684, Validation Loss: 0.692, Time: 9.1 s
Epoch 005: Training Loss: 0.679, Validation Loss: 0.686, Time: 8.8 s
Epoch 006: Training Loss: 0.675, Validation Loss: 0.686, Time: 8.8 s
Epoch 007: Training Loss: 0.672, Validation Loss: 0.685, Time: 8.8 s
Epoch 008: Training Loss: 0.668, Validation Loss: 0.678, Time: 8.9 s
Epoch 009: Training Loss: 0.666, Validation Loss: 0.676, Time: 8.8 s
Epoch 010: Training Loss: 0.664, Validation Loss: 0.674, Time: 8.8 s
Epoch 011: Training Loss: 0.662, Validation Loss: 0.677, Time: 8.8 s
Epoch 012: Training Loss: 0.660, Validation Loss: 0.673, Time: 8.7 s
Epoch 013: Training Loss: 0.

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.824, Validation Loss: 0.752, Time: 11.4 s
Epoch 001: Training Loss: 0.721, Validation Loss: 0.721, Time: 8.8 s
Epoch 002: Training Loss: 0.701, Validation Loss: 0.706, Time: 8.6 s
Epoch 003: Training Loss: 0.691, Validation Loss: 0.707, Time: 8.6 s
Epoch 004: Training Loss: 0.684, Validation Loss: 0.692, Time: 8.7 s
Epoch 005: Training Loss: 0.679, Validation Loss: 0.686, Time: 8.6 s
Epoch 006: Training Loss: 0.675, Validation Loss: 0.686, Time: 8.6 s
Epoch 007: Training Loss: 0.672, Validation Loss: 0.685, Time: 8.6 s
Epoch 008: Training Loss: 0.668, Validation Loss: 0.678, Time: 8.6 s
Epoch 009: Training Loss: 0.666, Validation Loss: 0.676, Time: 8.6 s
Epoch 010: Training Loss: 0.664, Validation Loss: 0.674, Time: 8.7 s
Epoch 011: Training Loss: 0.662, Validation Loss: 0.677, Time: 8.8 s
Epoch 012: Training Loss: 0.660, Validation Loss: 0.673, Time: 8.7 s
Epoch 013: Training Loss: 0

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.808, Validation Loss: 0.745, Time: 14.7 s
Epoch 001: Training Loss: 0.718, Validation Loss: 0.713, Time: 12.4 s
Epoch 002: Training Loss: 0.700, Validation Loss: 0.702, Time: 12.4 s
Epoch 003: Training Loss: 0.691, Validation Loss: 0.696, Time: 12.4 s
Epoch 004: Training Loss: 0.685, Validation Loss: 0.691, Time: 12.4 s
Epoch 005: Training Loss: 0.681, Validation Loss: 0.686, Time: 12.4 s
Epoch 006: Training Loss: 0.677, Validation Loss: 0.683, Time: 12.4 s
Epoch 007: Training Loss: 0.673, Validation Loss: 0.680, Time: 12.6 s
Epoch 008: Training Loss: 0.670, Validation Loss: 0.678, Time: 12.5 s
Epoch 009: Training Loss: 0.668, Validation Loss: 0.675, Time: 12.3 s
Epoch 010: Training Loss: 0.665, Validation Loss: 0.673, Time: 12.5 s
Epoch 011: Training Loss: 0.663, Validation Loss: 0.671, Time: 12.4 s
Epoch 012: Training Loss: 0.661, Validation Loss: 0.669, Time: 12.5 s
Epoch 013: Trai

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.808, Validation Loss: 0.745, Time: 18.8 s
Epoch 001: Training Loss: 0.718, Validation Loss: 0.713, Time: 12.5 s
Epoch 002: Training Loss: 0.700, Validation Loss: 0.702, Time: 12.2 s
Epoch 003: Training Loss: 0.691, Validation Loss: 0.696, Time: 12.2 s
Epoch 004: Training Loss: 0.685, Validation Loss: 0.691, Time: 12.3 s
Epoch 005: Training Loss: 0.681, Validation Loss: 0.686, Time: 12.3 s
Epoch 006: Training Loss: 0.677, Validation Loss: 0.683, Time: 12.3 s
Epoch 007: Training Loss: 0.673, Validation Loss: 0.680, Time: 12.3 s
Epoch 008: Training Loss: 0.670, Validation Loss: 0.678, Time: 12.2 s
Epoch 009: Training Loss: 0.668, Validation Loss: 0.675, Time: 12.1 s
Epoch 010: Training Loss: 0.665, Validation Loss: 0.673, Time: 12.1 s
Epoch 011: Training Loss: 0.663, Validation Loss: 0.671, Time: 12.3 s
Epoch 012: Training Loss: 0.661, Validation Loss: 0.669, Time: 12.2 s
Epoch 013: Trai

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.797, Validation Loss: 0.715, Time: 18.9 s
Epoch 001: Training Loss: 0.711, Validation Loss: 0.691, Time: 15.8 s
Epoch 002: Training Loss: 0.696, Validation Loss: 0.682, Time: 15.3 s
Epoch 003: Training Loss: 0.688, Validation Loss: 0.676, Time: 15.3 s
Epoch 004: Training Loss: 0.682, Validation Loss: 0.672, Time: 15.2 s
Epoch 005: Training Loss: 0.677, Validation Loss: 0.667, Time: 15.7 s
Epoch 006: Training Loss: 0.673, Validation Loss: 0.665, Time: 15.4 s
Epoch 007: Training Loss: 0.670, Validation Loss: 0.661, Time: 15.4 s
Epoch 008: Training Loss: 0.666, Validation Loss: 0.659, Time: 15.4 s
Epoch 009: Training Loss: 0.664, Validation Loss: 0.657, Time: 15.2 s
Epoch 010: Training Loss: 0.661, Validation Loss: 0.653, Time: 15.3 s
Epoch 011: Training Loss: 0.659, Validation Loss: 0.651, Time: 15.3 s
Epoch 012: Training Loss: 0.657, Validation Loss: 0.650, Time: 15.2 s
Epoch 013: Trai

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

Trying to set attribute `.var` of view, copying.
... storing 'orig.ident' as categorical
... storing 'sampleid' as categorical
... storing 'tissue' as categorical
... storing 'sorting' as categorical
... storing 'lineid' as categorical
... storing 'cell.labels' as categorical
... storing 'barcode' as categorical
... storing 'Time' as categorical
... storing 'Disease' as categorical
... storing 'Fact.sorting' as categorical
... storing 'Sex' as categorical
... storing 'Time2' as categorical
  view_to_actual(adata)


Pretrain weight index file not detected, pretraining autoencoder weights.

Epoch 000: Training Loss: 0.797, Validation Loss: 0.715, Time: 31.5 s
Epoch 001: Training Loss: 0.711, Validation Loss: 0.691, Time: 15.4 s
Epoch 002: Training Loss: 0.696, Validation Loss: 0.682, Time: 15.2 s
Epoch 003: Training Loss: 0.688, Validation Loss: 0.676, Time: 15.2 s
Epoch 004: Training Loss: 0.682, Validation Loss: 0.672, Time: 15.2 s
Epoch 005: Training Loss: 0.677, Validation Loss: 0.667, Time: 15.3 s
Epoch 006: Training Loss: 0.673, Validation Loss: 0.665, Time: 15.6 s
Epoch 007: Training Loss: 0.670, Validation Loss: 0.661, Time: 15.4 s
Epoch 008: Training Loss: 0.666, Validation Loss: 0.659, Time: 15.3 s
Epoch 009: Training Loss: 0.664, Validation Loss: 0.657, Time: 15.4 s
Epoch 010: Training Loss: 0.661, Validation Loss: 0.653, Time: 15.2 s
Epoch 011: Training Loss: 0.659, Validation Loss: 0.651, Time: 15.2 s
Epoch 012: Training Loss: 0.657, Validation Loss: 0.650, Time: 15.2 s
Epoch 013: Trai

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../../../../../../../anaconda3/envs/DESCImpute/lib/python3.7/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^

  state.func_ir.loc))



 11 clusters detected. 


-----------------------CarDEC Architecture-----------------------

Model: "car_dec__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Sequential)         multiple                  260256    
_________________________________________________________________
decoder (Sequential)         multiple                  262224    
_________________________________________________________________
encoderLVG (Sequential)      multiple                  2502944   
_________________________________________________________________
decoderLVG (Sequential)      multiple                  2526529   
_________________________________________________________________
clustering (ClusteringLayer) multiple                  352       
Total params: 5,552,305
Trainable params: 5,552,305
Non-trainable params: 0
_________________________________________________________________

-------------

In [6]:
profile_stats.to_csv("../Figures/liver/CarDEC_profile.csv")
profile_stats

Unnamed: 0,Time (Seconds),Memory (MiB),Method,Percent
0,291.202066,8000.339844,CarDEC Zscore,10.0
1,1046.707194,7759.339844,CarDEC Count,10.0
2,502.920225,7993.949219,CarDEC Zscore,20.0
3,1492.845753,10248.738281,CarDEC Count,20.0
4,1341.616484,15760.113281,CarDEC Zscore,40.0
5,3264.819926,18542.65625,CarDEC Count,40.0
6,2809.305696,20478.75,CarDEC Zscore,60.0
7,6043.807879,23409.832031,CarDEC Count,60.0
8,5034.924791,25158.582031,CarDEC Zscore,80.0
9,8807.557579,25087.699219,CarDEC Count,80.0
